Source code for syne_tune.experiments.launchers.hpo_main_sagemaker

# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
import logging
from typing import Optional

from syne_tune.experiments.baselines import MethodDefinitions
from syne_tune.experiments.launchers.hpo_main_common import (
    ExtraArgsType,
    MapMethodArgsType,
    ConfigDict,
    extra_metadata,
    DictStrKey,
    str2bool,
    config_from_argparse,
)
from syne_tune.experiments.launchers.hpo_main_local import (
    RealBenchmarkDefinitions,
    get_benchmark,
    create_objects_for_tuner,
    LOCAL_AND_SAGEMAKER_BACKEND_EXTRA_PARAMETERS,
)
from syne_tune.experiments.launchers.launch_remote_common import (
    sagemaker_estimator_args,
)
from syne_tune.experiments.launchers.utils import (
    get_master_random_seed,
)
from syne_tune.backend import SageMakerBackend
from syne_tune.backend.sagemaker_backend.sagemaker_utils import (
    default_sagemaker_session,
)
from syne_tune.remote.estimators import sagemaker_estimator
from syne_tune.results_callback import ExtraResultsComposer
from syne_tune.tuner import Tuner

# SageMaker managed warm pools:
# https://docs.aws.amazon.com/sagemaker/latest/dg/train-warm-pools.html#train-warm-pools-resource-limits
# Maximum time a warm pool instance is kept alive, waiting to be associated with
# a new job. Setting this too large may lead to extra costs.
WARM_POOL_KEEP_ALIVE_PERIOD_IN_SECONDS = 10 * 60


SAGEMAKER_BACKEND_ONLY_EXTRA_PARAMETERS = [
    dict(
        name="max_failures",
        type=int,
        default=3,
        help="Number of trials which can fail without experiment being terminated",
    ),
    dict(
        name="warm_pool",
        type=str2bool,
        default=True,
        help=(
            "If 1, the SageMaker managed warm pools feature is used. "
            "This can be more expensive, but also reduces startup "
            "delays, leading to an experiment finishing in less time"
        ),
    ),
    dict(
        name="start_jobs_without_delay",
        type=str2bool,
        default=False,
        help=(
            "If 1, the tuner starts new trials immediately after "
            "sending existing ones a stop signal. This leads to more "
            "than n_workers instances being used during certain times, "
            "which can lead to quotas being exceeded, or the warm pool "
            "feature not working optimal."
        ),
    ),
]


SAGEMAKER_BACKEND_EXTRA_PARAMETERS = (
    LOCAL_AND_SAGEMAKER_BACKEND_EXTRA_PARAMETERS
    + SAGEMAKER_BACKEND_ONLY_EXTRA_PARAMETERS
)



[docs]
def start_experiment_sagemaker_backend(
    configuration: ConfigDict,
    methods: MethodDefinitions,
    benchmark_definitions: RealBenchmarkDefinitions,
    extra_results: Optional[ExtraResultsComposer] = None,
    map_method_args: Optional[MapMethodArgsType] = None,
    extra_tuning_job_metadata: Optional[DictStrKey] = None,
):
    """
    Runs experiment with SageMaker backend.

    ``map_method_args`` can be used to modify ``method_kwargs`` for constructing
    :class:`~syne_tune.experiments.baselines.MethodArguments`, depending on
    ``configuration`` and the method. This allows for extra flexibility to specify specific arguments for chosen methods
    Its signature is :code:`method_kwargs = map_method_args(configuration, method, method_kwargs)`,
    where ``method`` is the name of the baseline.

    :param configuration: ConfigDict with parameters of the experiment.
        Must contain all parameters from SAGEMAKER_BACKEND_EXTRA_PARAMETERS
    :param methods: Dictionary with method constructors.
    :param benchmark_definitions: Definitions of benchmarks; one is selected from
        command line arguments
    :param extra_results: If given, this is used to append extra information to the
        results dataframe
    :param map_method_args: See above, optional
    :param extra_tuning_job_metadata: Metadata added to the tuner, can be used to manage results
    """
    configuration.check_if_all_paremeters_present(SAGEMAKER_BACKEND_EXTRA_PARAMETERS)
    configuration.expand_base_arguments(SAGEMAKER_BACKEND_EXTRA_PARAMETERS)

    experiment_tag = configuration.experiment_tag
    benchmark = get_benchmark(
        configuration, benchmark_definitions, sagemaker_backend=True
    )
    benchmark_name = configuration.benchmark
    master_random_seed = get_master_random_seed(configuration.random_seed)
    method_names = list(methods.keys())

    assert (
        len(method_names) == 1 and len(configuration.seeds) == 1
    ), "Can only launch single (method, seed). Use launch_remote to launch several combinations"
    method = method_names[0]
    seed = configuration.seeds[0]
    logging.getLogger().setLevel(logging.INFO)
    print(
        f"Starting experiment ({method}/{benchmark_name}/{seed}) of {experiment_tag}"
        f"  max_wallclock_time = {benchmark.max_wallclock_time}, "
        f"  n_workers = {benchmark.n_workers}"
    )

    sm_args = sagemaker_estimator_args(
        entry_point=benchmark.script,
        experiment_tag="A",
        tuner_name="B",
        benchmark=benchmark,
    )
    del sm_args["checkpoint_s3_uri"]
    sm_args["sagemaker_session"] = default_sagemaker_session()
    if configuration.warm_pool:
        print(
            "--------------------------------------------------------------------------\n"
            "Using SageMaker managed warm pools in order to decrease start-up delays.\n"
            f"In order for this to work, you need to have at least {benchmark.n_workers} quotas of the type\n"
            f"   {benchmark.instance_type} for training warm pool usage\n"
            "--------------------------------------------------------------------------"
        )
        sm_args["keep_alive_period_in_seconds"] = WARM_POOL_KEEP_ALIVE_PERIOD_IN_SECONDS
    if configuration.instance_type is not None:
        sm_args["instance_type"] = configuration.instance_type
    trial_backend = SageMakerBackend(
        sm_estimator=sagemaker_estimator[benchmark.framework](**sm_args),
        # names of metrics to track. Each metric will be detected by Sagemaker if it is written in the
        # following form: "[RMSE]: 1.2", see in train_main_example how metrics are logged for an example
        delete_checkpoints=configuration.delete_checkpoints,
        metrics_names=[benchmark.metric],
    )

    tuner_kwargs = create_objects_for_tuner(
        configuration,
        methods=methods,
        method=method,
        benchmark=benchmark,
        master_random_seed=master_random_seed,
        seed=seed,
        verbose=True,
        extra_tuning_job_metadata=extra_tuning_job_metadata,
        map_method_args=map_method_args,
        extra_results=extra_results,
    )
    tuner = Tuner(
        trial_backend=trial_backend,
        **tuner_kwargs,
        sleep_time=5.0,
        max_failures=configuration.max_failures,
        start_jobs_without_delay=configuration.start_jobs_without_delay,
    )
    tuner.run()




[docs]
def main(
    methods: MethodDefinitions,
    benchmark_definitions: RealBenchmarkDefinitions,
    extra_args: Optional[ExtraArgsType] = None,
    map_method_args: Optional[MapMethodArgsType] = None,
    extra_results: Optional[ExtraResultsComposer] = None,
):
    """
    Runs experiment with SageMaker backend.

    Command line arguments must specify a single benchmark, method, and seed,
    for example ``--method ASHA --num_seeds 5 --start_seed 4`` starts experiment
    with ``seed=4``, or ``--method ASHA --num_seeds 1`` starts experiment with
    ``seed=0``. Here, ``ASHA`` must be key in ``methods``.

    ``map_method_args`` can be used to modify ``method_kwargs`` for constructing
    :class:`~syne_tune.experiments.baselines.MethodArguments`, depending on
    ``configuration`` returned by :func:`parse_args` and the method. Its
    signature is
    :code:`method_kwargs = map_method_args(configuration, method, method_kwargs)`,
    where ``method`` is the name of the baseline. It is called just before the
    method is created.

    :param methods: Dictionary with method constructors
    :param benchmark_definitions: Definitions of benchmark; one is selected from
        command line arguments
    :param extra_args: Extra arguments for command line parser. Optional
    :param map_method_args: See above. Needed if ``extra_args`` is given
    :param extra_results: If given, this is used to append extra information to the
        results dataframe
    """
    configuration = config_from_argparse(extra_args, SAGEMAKER_BACKEND_EXTRA_PARAMETERS)
    method_names = (
        [configuration.method]
        if configuration.method is not None
        else list(methods.keys())
    )
    methods = {mname: methods[mname] for mname in method_names}
    if extra_args is not None:
        assert (
            map_method_args is not None
        ), "map_method_args must be specified if extra_args is used"

    start_experiment_sagemaker_backend(
        configuration,
        methods=methods,
        benchmark_definitions=benchmark_definitions,
        map_method_args=map_method_args,
        extra_results=extra_results,
        extra_tuning_job_metadata=None
        if extra_args is None
        else extra_metadata(configuration, extra_args),
    )