benchmark_hypertune

Comparison of Hyper-Tune against a number of baselines. This example also demonstrate how results can be plotted.

benchmarking/examples/benchmark_hypertune/baselines.py

from syne_tune.experiments.default_baselines import (
    ASHA,
    MOBSTER,
    HyperTune,
    SyncHyperband,
    SyncBOHB,
)


class Methods:
    ASHA = "ASHA"
    MOBSTER_JOINT = "MOBSTER-JOINT"
    MOBSTER_INDEP = "MOBSTER-INDEP"
    HYPERTUNE_INDEP = "HYPERTUNE-INDEP"
    HYPERTUNE_JOINT = "HYPERTUNE-JOINT"
    SYNCHB = "SYNCHB"
    BOHB = "BOHB"


methods = {
    Methods.ASHA: lambda method_arguments: ASHA(
        method_arguments,
        type="promotion",
    ),
    Methods.MOBSTER_JOINT: lambda method_arguments: MOBSTER(
        method_arguments,
        type="promotion",
    ),
    Methods.MOBSTER_INDEP: lambda method_arguments: MOBSTER(
        method_arguments,
        type="promotion",
        search_options=dict(model="gp_independent"),
    ),
    Methods.HYPERTUNE_INDEP: lambda method_arguments: HyperTune(
        method_arguments,
        type="promotion",
        search_options=dict(model="gp_independent"),
    ),
    Methods.HYPERTUNE_JOINT: lambda method_arguments: HyperTune(
        method_arguments,
        type="promotion",
        search_options=dict(model="gp_multitask"),
    ),
    Methods.SYNCHB: lambda method_arguments: SyncHyperband(method_arguments),
    Methods.BOHB: lambda method_arguments: SyncBOHB(method_arguments),
}

benchmarking/examples/benchmark_hypertune/benchmark_definitions.py

from syne_tune.experiments.benchmark_definitions import (
    nas201_benchmark_definitions,
    fcnet_benchmark_definitions,
    lcbench_selected_benchmark_definitions,
)


benchmark_definitions = {
    **nas201_benchmark_definitions,
    **fcnet_benchmark_definitions,
    **lcbench_selected_benchmark_definitions,
}

benchmarking/examples/benchmark_hypertune/hpo_main.py

from typing import Dict, Any

from baselines import methods
from benchmark_definitions import benchmark_definitions
from syne_tune.experiments.launchers.hpo_main_simulator import main
from syne_tune.util import recursive_merge


extra_args = [
    dict(
        name="num_brackets",
        type=int,
        help="Number of brackets",
    ),
    dict(
        name="num_samples",
        type=int,
        default=50,
        help="Number of samples for Hyper-Tune distribution",
    ),
]


def map_method_args(args, method: str, method_kwargs: Dict[str, Any]) -> Dict[str, Any]:
    if method.startswith("HYPERTUNE"):
        scheduler_kwargs = {
            "search_options": {"hypertune_distribution_num_samples": args.num_samples},
        }
    else:
        scheduler_kwargs = dict()
    if args.num_brackets is not None:
        scheduler_kwargs["brackets"] = args.num_brackets
    if scheduler_kwargs:
        method_kwargs = recursive_merge(
            method_kwargs, {"scheduler_kwargs": scheduler_kwargs}
        )
    return method_kwargs


if __name__ == "__main__":
    main(methods, benchmark_definitions, extra_args, map_method_args)

benchmarking/examples/benchmark_hypertune/launch_remote.py

from pathlib import Path

from benchmark_definitions import benchmark_definitions
from baselines import methods
from hpo_main import extra_args
from syne_tune.experiments.launchers.launch_remote_simulator import launch_remote


if __name__ == "__main__":

    def _is_expensive_method(method: str) -> bool:
        return method.startswith("MOBSTER") or method.startswith("HYPERTUNE")

    entry_point = Path(__file__).parent / "hpo_main.py"
    launch_remote(
        entry_point=entry_point,
        methods=methods,
        benchmark_definitions=benchmark_definitions,
        extra_args=extra_args,
        is_expensive_method=_is_expensive_method,
    )

benchmarking/examples/benchmark_hypertune/plot_results.py

from typing import Dict, Any, Optional
import logging

from baselines import methods
from benchmark_definitions import benchmark_definitions
from syne_tune.experiments import ComparativeResults, PlotParameters, SubplotParameters


def metadata_to_setup(metadata: Dict[str, Any]) -> Optional[str]:
    # The setup is the algorithm. No filtering
    return metadata["algorithm"]


SETUPS_RIGHT = ("ASHA", "SYNCHB", "BOHB")


def metadata_to_subplot(metadata: Dict[str, Any]) -> Optional[int]:
    return int(metadata["algorithm"] in SETUPS_RIGHT)


if __name__ == "__main__":
    logging.getLogger().setLevel(logging.INFO)
    experiment_name = "docs-1"
    experiment_names = (experiment_name,)
    setups = list(methods.keys())
    num_runs = 15
    download_from_s3 = False  # Set ``True`` in order to download files from S3
    # Plot parameters across all benchmarks
    plot_params = PlotParameters(
        xlabel="wall-clock time",
        aggregate_mode="iqm_bootstrap",
        grid=True,
    )
    # We would like two subplots (1 row, 2 columns), with MOBSTER and HYPERTUNE
    # results on the left, and the remaining baselines on the right. Each
    # column gets its own title, and legends are shown in both
    plot_params.subplots = SubplotParameters(
        nrows=1,
        ncols=2,
        kwargs=dict(sharey="all"),
        titles=["Model-based Methods", "Baselines"],
        legend_no=[0, 1],
    )
    # The creation of ``results`` downloads files from S3 (only if
    # ``download_from_s3 == True``), reads the metadata and creates an inverse
    # index. If any result files are missing, or there are too many of them,
    # warning messages are printed
    results = ComparativeResults(
        experiment_names=experiment_names,
        setups=setups,
        num_runs=num_runs,
        metadata_to_setup=metadata_to_setup,
        plot_params=plot_params,
        metadata_to_subplot=metadata_to_subplot,
        download_from_s3=download_from_s3,
    )
    # We can now create plots for the different benchmarks
    # First: nas201-cifar100
    benchmark_name = "nas201-cifar100"
    benchmark = benchmark_definitions[benchmark_name]
    # These parameters overwrite those given at construction
    plot_params = PlotParameters(
        metric=benchmark.metric,
        mode=benchmark.mode,
        ylim=(0.265, 0.31),
    )
    results.plot(
        benchmark_name=benchmark_name,
        plot_params=plot_params,
        file_name=f"./{experiment_name}-{benchmark_name}.png",
    )
    # Next: nas201-ImageNet16-120
    benchmark_name = "nas201-ImageNet16-120"
    benchmark = benchmark_definitions[benchmark_name]
    # These parameters overwrite those given at construction
    plot_params = PlotParameters(
        metric=benchmark.metric,
        mode=benchmark.mode,
        ylim=(0.535, 0.58),
    )
    results.plot(
        benchmark_name=benchmark_name,
        plot_params=plot_params,
        file_name=f"./{experiment_name}-{benchmark_name}.png",
    )

benchmarking/examples/benchmark_hypertune/requirements.txt

syne-tune[gpsearchers,kde,blackbox-repository,aws]
tqdm