Source code for syne_tune.experiments.visualization.plot_per_trial

# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
from typing import Optional, Tuple, Union, List, Iterable, Dict
import logging
import copy
from dataclasses import dataclass

import numpy as np
import pandas as pd

from syne_tune.constants import (
    ST_TUNER_TIME,
)
from syne_tune.experiments.visualization.plotting import PlotParameters
from syne_tune.experiments.visualization.results_utils import (
    MapMetadataToSetup,
    DateTimeBounds,
    create_index_for_result_files,
    load_results_dataframe_per_benchmark,
    download_result_files_from_s3,
    SINGLE_BENCHMARK_KEY,
)
from syne_tune.try_import import try_import_visual_message
from syne_tune.util import is_increasing, is_positive_integer

try:
    import matplotlib.pyplot as plt
except ImportError:
    print(try_import_visual_message())

logger = logging.getLogger(__name__)



[docs]
@dataclass
class MultiFidelityParameters:
    """
    Parameters configuring the multi-fidelity version of
    :class:`TrialsOfExperimentResults`.

    ``multifidelity_setups`` contains names of setups which are multi-fidelity,
    the remaining ones are single-fidelity. It can also be a dictionary,
    mapping a multi-fidelity setup name to ``True`` if this is a pause-and-resume
    method (these are visualized differently), ``False`` otherwise (early
    stopping method).

    :param rung_levels: See above. Positive integers, increasing
    :param multifidelity_setups: See above
    """

    rung_levels: List[int]
    multifidelity_setups: Union[List[str], Dict[str, bool]]


[docs]
    def check_params(self, setups: Iterable[str]):
        if isinstance(self.multifidelity_setups, dict):
            _mf_setups = list(self.multifidelity_setups.keys())
        else:
            _mf_setups = self.multifidelity_setups
        assert set(setups).issuperset(_mf_setups), (
            f"multi_fidelity_params.multifidelity_setups = {self.multifidelity_setups} "
            f"must be contained in setups = {setups}"
        )
        assert is_increasing(self.rung_levels) and is_positive_integer(
            self.rung_levels
        ), f"multi_fidelity_params.rung_levels = {self.rung_levels} must be increasing positive ints"





[docs]
class TrialsOfExperimentResults:
    """
    This class loads, processes, and plots metric results for single experiments,
    where the curves for different trials have different colours.

    Compared to :class:`~syne_tune.experiments.ComparativeResults`, each subfigure
    uses data from a single experiment (one benchmark, one seed, one setup). Both
    benchmark and seed need to be chosen in :meth:`plot`. If there are different
    setups, they give rise to subfigures.

    If ``plot_params.subplots`` is not given, the arrangement is one row with
    columns corresponding to setups, and setup names as titles. Specify
    ``plot_params.subplots`` in order to change this arrangement (e.g., to have
    more than one row). Setups can be selected by using
    ``plot_params.subplots.subplot_indices``. Also, if
    ``plot_params.subplots.titles`` is not given, we use setup names, and each
    subplot gets its own title (``plot_params.subplots.title_each_figure`` is
    ignored).

    For ``plot_params``, we use the same
    :class:`~syne_tune.experiments.PlotParameters` as in
    :class:`~syne_tune.experiments.ComparativeResults`, but some fields are not
    used here (``title``, ``aggregate_mode``, ``show_one_trial``,
    ``subplots.legend_no``, ``subplots.xlims``).

    :param experiment_names: Tuple of experiment names (prefixes, without the
        timestamps)
    :param setups: Possible values of setup names
    :param metadata_to_setup: See above
    :param plot_params: Parameters controlling the plot. Can be overwritten
        in :meth:`plot`. See :class:`PlotParameters`
    :param multi_fidelity_params: If given, we use a special variant tailored
        to multi-fidelity methods (see :meth:`plot`).
    :param benchmark_key: Key for benchmark in metadata files. Defaults to
        "benchmark". If this is ``None``, there is only a single benchmark,
        and all results are merged together
    :param seed_key: Key for seed in metadata files. Defaults to "seed".
    :param with_subdirs: See above. Defaults to "*"
    :param datetime_bounds: See above
    :param download_from_s3: Should result files be downloaded from S3? This
        is supported only if ``with_subdirs``
    :param s3_bucket: Only if ``download_from_s3 == True``. If not given, the
        default bucket for the SageMaker session is used
    """

    def __init__(
        self,
        experiment_names: Tuple[str, ...],
        setups: Iterable[str],
        metadata_to_setup: MapMetadataToSetup,
        plot_params: Optional[PlotParameters] = None,
        multi_fidelity_params: Optional[MultiFidelityParameters] = None,
        benchmark_key: Optional[str] = "benchmark",
        seed_key: str = "seed",
        with_subdirs: Optional[Union[str, List[str]]] = "*",
        datetime_bounds: Optional[DateTimeBounds] = None,
        download_from_s3: bool = False,
        s3_bucket: Optional[str] = None,
    ):
        assert setups, "setups must not be empty"
        if multi_fidelity_params is not None:
            multi_fidelity_params.check_params(setups)
        assert seed_key is not None, "seed_key must not be None"
        if download_from_s3:
            assert (
                with_subdirs is not None
            ), "Cannot download files from S3 if with_subdirs=None"
            download_result_files_from_s3(experiment_names, s3_bucket)
        result = create_index_for_result_files(
            experiment_names=experiment_names,
            metadata_to_setup=metadata_to_setup,
            benchmark_key=benchmark_key,
            with_subdirs=with_subdirs,
            datetime_bounds=datetime_bounds,
            seed_key=seed_key,
        )
        self._reverse_index = result["index"]
        assert result["setup_names"] == set(setups), (
            f"Filtered results contain setup names {result['setup_names']}, "
            f"but should contain setup names {setups}"
        )
        self.setups = tuple(setups)
        self._default_plot_params = copy.deepcopy(plot_params)
        self._benchmark_key = benchmark_key
        if multi_fidelity_params is not None:
            self._multifidelity_setups = multi_fidelity_params.multifidelity_setups
            if not isinstance(self._multifidelity_setups, dict):
                self._multifidelity_setups = {
                    name: False for name in self._multifidelity_setups
                }
            # We need rung levels minus 1 below
            self._rung_levels = [
                level - 1 for level in multi_fidelity_params.rung_levels
            ]
        else:
            self._multifidelity_setups = dict()
            self._rung_levels = []

    def _plot_figure(
        self,
        df: pd.DataFrame,
        plot_params: PlotParameters,
        benchmark_name: Optional[str],
        seed: int,
    ):
        subplots = plot_params.subplots
        if subplots is not None:
            nrows = subplots.nrows
            ncols = subplots.ncols
            subplot_indices = (
                list(range(len(self.setups)))
                if subplots.subplot_indices is None
                else subplots.subplot_indices
            )
            assert ncols * nrows >= len(
                subplot_indices
            ), f"Error in subplots.kwargs: ncols times nrows must be >= {len(subplot_indices)} (number of setups)"
            subplots_kwargs = dict(
                dict() if subplots.kwargs is None else subplots.kwargs,
                nrows=nrows,
                ncols=ncols,
            )
            if subplots.titles is not None:
                subplot_titles = subplots.titles
                title_each_figure = subplots.title_each_figure
            else:
                # If ``plot_params.subplots.titles`` is not given, we use setup
                # names as titles. In this case, each subfigure has its own
                # title, not just each column
                subplot_titles = [self.setups[ind] for ind in subplot_indices]
                title_each_figure = True
        else:
            nrows = 1
            ncols = len(self.setups)
            subplots_kwargs = dict(nrows=nrows, ncols=ncols, sharey="all")
            subplot_titles = self.setups
            title_each_figure = False
            subplot_indices = list(range(ncols))
        ylim = plot_params.ylim
        xlim = plot_params.xlim
        xlabel = plot_params.xlabel
        ylabel = plot_params.ylabel
        tick_params = plot_params.tick_params
        msg_prefix = f"seed = {seed}: "
        if benchmark_name is not None:
            msg_prefix = f"benchmark_name = {benchmark_name}, " + msg_prefix
        num_rungs = len(self._rung_levels)

        plt.figure(dpi=plot_params.dpi)
        figsize = (5 * ncols, 4 * nrows)
        fig, axs = plt.subplots(**subplots_kwargs, squeeze=False, figsize=figsize)
        for setup_name, setup_df in df.groupby("setup_name"):
            # Check that there is a single experiment per setup
            tuner_names = list(setup_df.tuner_name.unique())
            assert len(tuner_names) == 1, (
                msg_prefix
                + f"For setup_name = {setup_name} found tuner_names = {tuner_names}"
            )
            logger.info(msg_prefix + f"setup_name = {setup_name}: {tuner_names[0]}")
            is_multi_fidelity = setup_name in self._multifidelity_setups
            pause_resume = (
                self._multifidelity_setups[setup_name] if is_multi_fidelity else False
            )
            subplot_index = self.setups.index(setup_name)
            if subplot_index not in subplot_indices:
                continue
            subplot_no = subplot_indices.index(subplot_index)
            row = subplot_no % nrows
            col = subplot_no // nrows
            ax = axs[row, col]
            current_color = [0] * (num_rungs + 1)
            for trial_id in setup_df.trial_id.unique():
                sub_df = setup_df[setup_df["trial_id"] == trial_id]
                y = np.array(sub_df[plot_params.metric])
                rt = np.array(sub_df[ST_TUNER_TIME])
                sz = y.size
                if is_multi_fidelity:
                    rungs_here = [x for x in self._rung_levels if x < sz]
                    col_ind = len(rungs_here)
                else:
                    rungs_here = None
                    col_ind = 0
                color = f"C{current_color[col_ind]}"
                current_color[col_ind] += 1
                if not pause_resume and sz > 1:
                    ax.plot(rt, y, "-", color=color)
                else:
                    # Pause and resume: Plot differences pieces
                    ranges = [
                        (a + 1, b + 1) for a, b in zip(rungs_here[:-1], rungs_here[1:])
                    ]
                    if len(rungs_here) < num_rungs:
                        a = rungs_here[-1] + 1
                        if a < sz - 1:
                            ranges.append((a, sz))
                    for a, b in ranges:
                        ax.plot(rt[a:b], y[a:b], "-", color=color)
                if is_multi_fidelity:
                    if rungs_here[-1] == sz - 1:
                        final_ind = [sz - 1]
                        rungs_here = rungs_here[:-1]
                        ax.plot(
                            rt[final_ind],
                            y[final_ind],
                            marker="D",
                            markeredgecolor=color,
                            color="none",
                            markersize=3,
                        )
                    ax.plot(
                        rt[rungs_here],
                        y[rungs_here],
                        marker="o",
                        markeredgecolor=color,
                        color="none",
                        markersize=3,
                    )
                else:
                    ax.plot(
                        rt[-1:],
                        y[-1:],
                        marker="o",
                        markeredgecolor=color,
                        color="none",
                        markersize=3,
                    )

            if xlim is not None:
                ax.set_xlim(*xlim)
            if ylim is not None:
                ax.set_ylim(*ylim)
            if xlabel is not None and row == nrows - 1:
                ax.set_xlabel(xlabel)
            if ylabel is not None and col == 0:
                ax.set_ylabel(ylabel)
            if tick_params is not None:
                ax.tick_params(**tick_params)
            if title_each_figure:
                ax.set_title(subplot_titles[subplot_no])
            elif row == 0:
                ax.set_title(subplot_titles[col])
            if plot_params.grid:
                ax.grid(True)
        plt.show()
        return fig, axs


[docs]
    def plot(
        self,
        benchmark_name: Optional[str] = None,
        seed: int = 0,
        plot_params: Optional[PlotParameters] = None,
        file_name: Optional[str] = None,
    ):
        """
        Creates a plot, whose subfigures should metric data from single
        experiments. In general:

        * Each trial has its own color, which is cycled through periodically.
          The cycling depends on the largest rung level for the trial. This
          is to avoid neighboring curves to have the same color

        For single-fidelity methods (default, ``multi_fidelity_params`` not
        given):

        * The learning curve for a trial ends with 'o'. If it reports only
          once at the end, this is all that is shown for the trial

        For multi-fidelity methods:

        * Learning curves are plotted in contiguous chunks of execution. For
          pause and resume setups (those in
          ``multi_fidelity_params.pause_resume_setups), they are interrupted.
          Each chunk starts at the epoch after resume and ends at the epoch
          where the trial is paused
        * Values at rung levels are marked as 'o'. If this is the furthest
          the trial got to, the marker is 'D' (diamond)

        Results for different setups are plotted as subfigures, either using
        the setup in ``plot_params.subplots``, or as columns of a single row.

        :param benchmark_name: Name of benchmark for which to plot results.
            Not needed if there is only one benchmark
        :param seed: Seed number. Defaults to 0
        :param plot_params: Parameters controlling the plot. Values provided
            here overwrite values provided at construction.
        :param file_name: If given, the figure is stored in a file of this name
        """
        index_key = (
            SINGLE_BENCHMARK_KEY if self._benchmark_key is None else benchmark_name,
            seed,
        )
        assert (
            index_key in self._reverse_index
        ), f"{index_key} not found in index, contains: {list(self._reverse_index.keys())}"
        if plot_params is None:
            plot_params = PlotParameters()
        plot_params = plot_params.merge_defaults(self._default_plot_params)
        if benchmark_name is not None:
            logger.info(f"Load results for benchmark {benchmark_name}, seed {seed}")
        else:
            logger.info(f"Load results for seed {seed}")
        results_df = load_results_dataframe_per_benchmark(
            self._reverse_index[index_key]
        )
        fig, axs = self._plot_figure(
            df=results_df,
            plot_params=plot_params,
            benchmark_name=benchmark_name,
            seed=seed,
        )
        if file_name is not None:
            fig.savefig(file_name, dpi=plot_params.dpi)