Source code for benchmarking.examples.fine_tuning_transformer_glue.plot_results

# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
from typing import Dict, Any, Optional

from benchmarking.examples.fine_tuning_transformer_glue.baselines import methods
from benchmarking.benchmark_definitions import (
    real_benchmark_definitions,
)
from syne_tune.experiments import (
    ComparativeResults,
    PlotParameters,
    ShowTrialParameters,
)


SETUPS = list(methods.keys())


[docs] def metadata_to_setup(metadata: Dict[str, Any]) -> Optional[str]: # The setup is the algorithm. No filtering return metadata["algorithm"]
if __name__ == "__main__": experiment_name = "glue-6" experiment_names = (experiment_name,) num_runs = 5 download_from_s3 = False # Set ``True`` in order to download files from S3 # Plot parameters across all benchmarks plot_params = PlotParameters( xlabel="wall-clock time", aggregate_mode="iqm_bootstrap", grid=True, ) # We also show the performance of the initial trial, which corresponds to the # Hugging Face default plot_params.show_init_trials = ShowTrialParameters( setup_name="BO", trial_id=0, new_setup_name="default", ) # The creation of ``results`` downloads files from S3 (only if # ``download_from_s3 == True``), reads the metadata and creates an inverse # index. If any result files are missing, or there are too many of them, # warning messages are printed results = ComparativeResults( experiment_names=experiment_names, setups=SETUPS, num_runs=num_runs, metadata_to_setup=metadata_to_setup, plot_params=plot_params, download_from_s3=download_from_s3, ) # We can now create plots for the different benchmarks for dataset, ylim in [ ("rte", (0.27, 0.38)), ("mrpc", (0.09, 0.15)), ("stsb", (0.1, 0.15)), ]: for do_modsel in [False, True]: if do_modsel: benchmark_name = f"finetune_transformer_glue_modsel_{dataset}" title = f"Fine-tuning and model selection on GLUE {dataset}" else: benchmark_name = f"finetune_transformer_glue_{dataset}" title = f"Fine-tuning bert-base-cased on GLUE {dataset}" benchmark = real_benchmark_definitions()[benchmark_name] # These parameters overwrite those given at construction plot_params = PlotParameters( title=title, metric=benchmark.metric, mode=benchmark.mode, ylim=ylim, ) results.plot( benchmark_name=benchmark_name, plot_params=plot_params, file_name=f"./{benchmark_name}.png", )