Source code for optilab.plotting.ecdf_curve

"""
Plotting and calculating ECDF curves.
"""

import math
from typing import Dict, List, Tuple

import matplotlib.pyplot as plt
import numpy as np

from ..data_classes import PointList
from .convergence_curve import convergence_curve


[docs] def _ecdf_thresholding( log: List[float], thresholds: List[float], n_dimensions: int, extend_to_len: int | None = None, ) -> Tuple[List[float], List[float]]: """ Perform thresholding of a log with a given list of thresholds. The resulting y is the number of thresholds achieved by the log items. Args: log: Error log of a optimization function. thresholds: ECDF value thresholds. n_dimensions: Dimensionality of optimized function. Returns: x and y values for the curve. """ y = [np.sum(thresholds >= item) / len(thresholds) for item in log] if extend_to_len: if extend_to_len < len(y): raise ValueError( "extend_to_len parameter is lower than lenght of the provided log" ) y.extend([y[-1]] * (extend_to_len - len(y))) x = [(i + 1) / n_dimensions for i in range(len(y))] return x, y
[docs] def ecdf_curve( data: Dict[str, List[PointList]], n_dimensions: int, allowed_error: float, n_thresholds: int = 100, ) -> Dict[str, Tuple[List[float], List[float]]]: """ Calculate ECDF curves. Args: data: Lists of value logs indexed by method name. n_dimensions: Dimensionality of the solved problem. allowed_error: Tolerable error value, used as the last threshold. n_thresholds: Number of ECDF thresholds. Returns: x, y plot points for each method. """ processed_logs = {} log_lengths = {} all_last_items = [] for method, logs in data.items(): processed_logs[method] = [] max_len = 0 for log in logs: new_log = convergence_curve(log) max_len = max(max_len, len(new_log)) processed_log = [math.log10(max(v, allowed_error)) for v in new_log] processed_logs[method].append(processed_log) all_last_items.append(processed_log[-1]) log_lengths[method] = max_len low_value = math.log10(allowed_error) high_value = max(all_last_items) thresholds = np.linspace(low_value, high_value, n_thresholds + 1)[1:] ecdf_data = {} for method, logs in processed_logs.items(): ecdf_ys = [] ecdf_x = [] for log in logs: x, y = _ecdf_thresholding( log, thresholds, n_dimensions, log_lengths[method] ) ecdf_x = x ecdf_ys.append(y) ecdf_avg = np.mean(ecdf_ys, axis=0) ecdf_data[method] = (ecdf_x, ecdf_avg) return ecdf_data
[docs] def plot_ecdf_curves( data: Dict[str, PointList], n_dimensions: int, allowed_error: float, n_thresholds: int = 100, savepath: str | None = None, *, show: bool = True, function_name: str | None = None, ) -> None: """ Calculate and plot ECDF curves. Args: data: Lists of value logs for every method. n_dimensions: Dimensionality of the optimized function. allowed_error: Tolerable error value, used as the last threshold. n_thresholds: Number of ECDF thresholds. savepath: Path to save the plot, optional. show: Wheather to show the plot, default True. function_name: Name of the optimized function, used in title. """ plt.clf() ecdf_data = ecdf_curve(data, n_dimensions, allowed_error, n_thresholds) for method, (x, y) in ecdf_data.items(): plt.plot(x, y, label=method) plt.xlabel("Number of function evaluations divided by the number of dimensions.") plt.xscale("log") plt.ylabel("ECDF point pairs") plt.legend() plt.grid(True) if function_name: plt.title(f"ECDF curves for function {function_name}") else: plt.title("ECDF curves") if savepath: plt.savefig(savepath) if show: plt.show()