Source code for xplogger.experiment_manager.notebook.utils

# type: ignore
"""Utlities functions to make it easier to use xplogger with a jupyter notebook."""

from __future__ import annotations

import itertools
from typing import Any

import numpy as np
import pandas as pd
from omegaconf import DictConfig, OmegaConf

from xplogger.experiment_manager.record.record_list import RecordList
from xplogger.parser.experiment.experiment import (
    ExperimentSequence,
    ExperimentSequenceDict,
)
from xplogger.types import ValueType


[docs]def prettyprint_dict(d: dict, sep: str = "\t", indent: int = 0) -> None: r"""Pretty print a dictionary. Args: d (dict): input dictionary sep (str, optional): Seperator to use. Defaults to "\t". indent (int, optional): Indentation to use. Defaults to 0. """ for key, value in d.items(): print(sep * indent + str(key)) if isinstance(value, dict): prettyprint_dict(d=value, indent=indent + 1) else: print(sep * (indent + 1) + str(value))
[docs]def get_mean_and_std_err( experiment_sequence: ExperimentSequence, metadata: DictConfig ) -> tuple[np.typing.NDArray[np.float32], np.typing.NDArray[np.float32], int]: """Compute the mean and standard error for a given experiment sequence. Args: experiment_sequence (ExperimentSequence): metadata (DictConfig): metadata to use for computing the metrics Returns: tuple[np.typing.NDArray[np.float32], np.typing.NDArray[np.float32], int]: tuple of mean, standard error and number of experiments in the experiment sequence (useful for computing standard deviation etc). """ aggregated_metrics = experiment_sequence.aggregate_metrics( metric_names=[metadata.metric_name], x_name=metadata.x.name, x_min=metadata.x.min, x_max=metadata.x.max, mode=metadata.mode, drop_duplicates=True, verbose=True, dropna=True, ) metrics = aggregated_metrics[metadata.metric_name] mean = np.mean(metrics, axis=0) std = np.std(metrics, axis=0) / np.sqrt(metrics.shape[0]) return mean, std, metrics.shape[0]
[docs]def make_df( # noqa: C901 metadata: DictConfig, step_metadata: DictConfig, groups: dict[Any, RecordList], hyperparams: dict[str, set[ValueType]], exp_seq_dict: ExperimentSequenceDict, ) -> pd.DataFrame: """Make a dataframe using the given experience sequence dict. Args: metadata (DictConfig): Contains information like metric_name for the metrics of interest. step_metadata (DictConfig): Contains information like metric_name for the step metric (eg epoch or frames) groups (dict[Any, RecordList]): hyperparams (dict[str, set[ValueType]]): exp_seq_dict (ExperimentSequenceDict): Returns: pd.DataFrame: """ metrics = [ f"mean_{metadata.metric_name}", f"stderr_{metadata.metric_name}", "steps", ] results: dict[str, Any] = { "aggregated": {}, "converged": {}, } for key in list(hyperparams.keys()) + metrics + ["seeds"]: for mode in ["aggregated", "converged"]: results[mode][key] = [] valid_params = [ params for params in [ OmegaConf.create({k: v for k, v in zip(hyperparams.keys(), _product)}) for _product in itertools.product(*hyperparams.values()) ] if params in groups ] for combination in valid_params: for key in hyperparams: for mode in ["aggregated", "converged"]: results[mode][key].append(combination[key]) try: mean, std_err, num_seeds = get_mean_and_std_err( exp_seq_dict[combination], metadata ) mean_steps, _, _ = get_mean_and_std_err( exp_seq_dict[combination], step_metadata ) if mean is None or std_err is None: metric_name = metadata.metric_name results["converged"][f"stderr_{metric_name}"].append(None) results["converged"][f"mean_{metric_name}"].append(None) results["aggregated"][f"stderr_{metric_name}"].append(None) results["aggregated"][f"mean_{metric_name}"].append(None) else: metric_name = metadata.metric_name results["converged"][f"stderr_{metric_name}"].append( std_err[np.argmax(mean)] ) results["converged"][f"mean_{metric_name}"].append(np.max(mean)) # error: Call to untyped function "max" in typed context results["aggregated"][f"stderr_{metric_name}"].append(std_err[-1]) results["aggregated"][f"mean_{metric_name}"].append(mean[-1]) if mean_steps is None: for mode in ["aggregated", "converged"]: results[mode]["steps"].append(None) else: results["converged"]["steps"].append(np.max(mean_steps)) results["aggregated"]["steps"].append(mean_steps[-1]) for mode in ["aggregated", "converged"]: results[mode]["seeds"].append(num_seeds) except Exception as e: print(e) for key in hyperparams: for mode in ["aggregated", "converged"]: results[mode][key].pop(-1) return pd.DataFrame.from_dict(results["converged"])
# import matplotlib.pyplot as plt # from xplogger.experiment_manager.result import Result # def plot_result(result: Result, mode: str, metric: str) -> None: # info = result.info # steps = ( # result.metrics[mode]["steps"] * result.info[mode]["x"]["alpha"] # + result.info[mode]["x"]["beta"] # ) # y_mean = ( # result.metrics[mode][f"{metric}_mean"] * result.info[mode]["y"]["alpha"] # + result.info[mode]["y"]["beta"] # ) # y_stderr = result.metrics[mode][f"{metric}_stderr"] # plt.plot(steps, y_mean, label=result.label) # plt.fill_between(steps, y_mean - y_stderr, y_mean + y_stderr, alpha=0.1)