Source code for ml_logger.logbook

"""Implementation of the LogBook class.

LogBook class provides an interface to persist the logs on the filesystem,
tensorboard, remote backends, etc.

"""

import importlib
import time
from copy import deepcopy
from typing import Any, List, Optional

from ml_logger.logger.base import Logger as LoggerType
from ml_logger.types import ConfigType, KeyMapType, LogType, MetricType


[docs]class LogBook:
    """This class provides an interface to persist the logs on the filesystem, tensorboard, remote backends, etc."""

    def __init__(self, config: ConfigType):
        """Initialise the Logbook class.

        Args:
            logbook_config (ConfigType): Config to initialise the
                LogBook class. The logbook config must have the
                following keys:
                id: Id of the current LogBook instance. This
                    attribute is logged with each log and is useful
                    when multiple LogBook instances are needed (for
                    example with multiprocessing)
                logger_file_path: Path to the file, where the logs
                    will be written
                The logbook config can be created using the make_config
                method defined in ml_logger/logbook.py
            config (ConfigType): config corresponding to the ml experiment
                creating the logbook
        """
        self.id = config["id"]
        self.logger_name = config["name"]
        self.time_format = "%I:%M:%S%p %Z %b %d, %Y"  # 10:21:14PM EST Mar 04, 2020
        self.loggers: List[LoggerType] = []
        for logger_name, logger_config in config["loggers"].items():
            logger_module = importlib.import_module(f"ml_logger.logger.{logger_name}")
            logger_cls = getattr(logger_module, "Logger")
            logger = logger_cls(config=logger_config)
            self.loggers.append(logger)

    def _process_log(self, log: LogType, log_type: str) -> LogType:
        """Process the log before writing.

        Args:
            log (LogType): Log to process
            log_type (str): Type of the log: config, metric, metadata, etc

        Returns:
            LogType: Processed log
        """
        log["logbook_id"] = self.id
        log["logbook_timestamp"] = time.strftime(self.time_format)
        log["logbook_type"] = log_type
        return log

[docs]    def write(self, log: LogType, log_type: str = "metric") -> None:
        """Write log to loggers.

        Args:
            log (LogType): Log to write
            log_type (str, optional): Type of this log. Defaults to "metric".
        """
        log = self._process_log(deepcopy(log), log_type)
        for logger in self.loggers:
            logger.write(log=log)

[docs]    def write_config(self, config: ConfigType) -> None:
        """Write config to loggers.

        Args:
            config [ConfigType]: Config to write.
        """
        return self.write(log=config, log_type="config")

[docs]    def write_metric(self, metric: MetricType) -> None:
        """Write metric to loggers.

        Args:
            metric (MetricType): Metric to write
        """
        return self.write(log=metric, log_type="metric")

[docs]    def write_message(self, message: Any, log_type: str = "info") -> None:
        """Write message string to loggers.

        Args:
            message (Any): Message string to write
            log_type (str, optional): Type of this message (log).
                Defaults to "info".
        """
        return self.write(log={"message": message}, log_type=log_type)

[docs]    def write_metadata(self, metadata: LogType) -> None:
        """Write metadata to loggers.

        Args:
            metadata (LogType): Metadata to wite
        """
        return self.write(log=metadata, log_type="metadata")


[docs]def make_config(
    id: str = "0",
    name: str = "default_logger",
    write_to_console: bool = True,
    logger_dir: Optional[str] = None,
    filename: Optional[str] = None,
    filename_prefix: str = "",
    create_multiple_log_files: bool = True,
    wandb_config: Optional[ConfigType] = None,
    wandb_key_map: Optional[KeyMapType] = None,
    wandb_prefix_key: Optional[str] = None,
    tensorboard_config: Optional[ConfigType] = None,
    tensorboard_key_map: Optional[KeyMapType] = None,
    tensorboard_prefix_key: Optional[str] = None,
    mlflow_config: Optional[ConfigType] = None,
    mlflow_key_map: Optional[KeyMapType] = None,
    mlflow_prefix_key: Optional[str] = None,
    mongo_config: Optional[ConfigType] = None,
) -> ConfigType:
    """Make the config that can be passed to the LogBook constructor.

    Args:
        id (str, optional): Id of the current LogBook instance. Defaults to "0".
        name (str, optional): Name of the logger. Defaults to "default_logger".
        write_to_console (bool, optional): Should write the logs to console.
            Defaults to True
        logger_dir (str, optional):  Path where the logs will be
            written. If None is passed, logs are not written to the filesystem.
            LogBook creates the directory, if it does not exist. Defaults
            to None.
        filename (str, optional):  Name to assign to the log file (eg
            log.jsonl). If None is passed, this argument is ignored. If
            the value is set, `filename_prefix` and `create_multiple_log_files`
            arguments are ignored. Defaults to None.
        filename_prefix (str): String to prefix before the name of the log
            files. Eg if filename_prefix is "dummy", name of log files are
            dummymetric.jsonl, dummylog.jsonl etc. This argument is ignored
            if `filename` is set. Defaults to "".
        create_multiple_log_files (bool, optional): Should multiple log
            files be created - for config, metric, metadata and message
            logs. If True, the files are named as config_log.jsonl,
            metric_log.jsonl etc. If False, only one file log.jsonl is
            created. This argument is ignored if `filename` is set.
            Defaults to True.
        wandb_config (Optional[ConfigType], optional): Config for the wandb
            logger. If None, wandb logger is not created. The config can
            have any parameters that wandb.init() methods accepts
            (https://docs.wandb.com/library/init). Note that the wandb_config
            is passed as keyword arguments to the wandb.init() method.
            This provides a lot of flexibility to the users to configure
            wandb. This also means that the config should not have any
            parameters that wandb.init() would not accept. Defaults to None.
        wandb_key_map (Optional[KeyMapType], optional): When using wandb
            logger for logging metrics, certain keys are required. This
            dictionary provides an easy way to map the keys in the `log`
            to be written) with the keys that wandb logger needs. For
            instance, wandb logger needs a `step` key in all the metric
            logs. If your logs have a key called `epoch` that you want to
            use as `step`, set `wandb_key_map` as `{epoch: step}`. This
            argument is ignored if set to None. Defaults to None.
        wandb_prefix_key (Optional[str], optional): When a metric is logged
            to wandb, prefix the value (corresponding to the key) to all
            the remaining keys before values are logged in the wandb logger.
            This argument is ignored if set to None. Defaults to None.
        tensorboard_config (Optional[ConfigType], optional): config to
            initialise the tensorboardX logger. The config can have
            any parameters that [tensorboardX.SummaryWriter() method](https://tensorboardx.readthedocs.io/en/latest/tensorboard.html#tensorboardX.SummaryWriter)
            accepts. Note that the config is passed as keyword arguments
            to the tensorboardX.SummaryWriter() method. This provides a lot
            of flexibility to the users to configure tensorboard. This also
            means that config should not have any parameters that
            tensorboardX.SummaryWriter() would not accept. Defaults to None.
        tensorboard_key_map (Optional[KeyMapType], optional): When using
            tensorboard logger for logging metrics, certain keys are required.
            This dictionary provides an easy way to map the keys in the `log`
            (to be written) with the keys that tensorboard logger needs.
            For instance, tensorboard logger needs a `main_tag` key and a
            `global_step` in all the metric logs. If your logs have a key
            called `epoch` that you want to use as `step`, and a key called
            `mode` that you want to use as `main_tag`, set `tensorboard_key_map`
            as `{epoch: global_step, mode: main_tag}`. This argument is
            ignored if set to None. Defaults to None.
        tensorboard_prefix_key (Optional[str], optional): When a metric is
            logged to tensorboard, prefix the value (corresponding to the key)
            to all the remaining keys before values are logged in the
            tensorboard logger. This argument is ignored if set to None.
            Defaults to None.
        mlflow_config (Optional[ConfigType], optional): config to
            initialise an mlflow experiment. The config can have
            any parameters that [mlflow.create_experiment() method](https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.create_experiment)
            accepts. Note that the config is passed as keyword arguments
            to the mlflow.create_experiment() method. This provides a lot
            of flexibility to the users to configure mlflow. This also
            means that config should not have any parameters that
            mlflow.create_experiment would not accept. Defaults to None.
        mlflow_key_map (Optional[KeyMapType], optional): When using mlflow
            logger for logging metrics, certain keys are required. This
            dictionary provides an easy way to map the keys in the `log`
            (to be written) with the keys that mlflow logger needs. For
            instance, mlflow logger needs a `step` key in all the metric
            logs. If your logs have a key called `epoch` that you want to
            use as `step`, set `mlflow_key_map` as `{epoch: step}`. This
            argument is ignored if set to None. Defaults to None.
        mlflow_prefix_key (Optional[str], optional): When a metric is logged
            to mlflow, prefix the value (corresponding to the key) to all
            the remaining keys before values are logged in the mlflow logger.
            This argument is ignored if set to None. Defaults to None.
        mongo_config (Optional[ConfigType], optional): config to
            initialise connection to a collection in mongodb. The config
            supports the following keys:
                (1) host: host where mongodb is running.
                (2) port: port on which mongodb is running.
                (3) db: name of the db to use.
                (4) collection: name of the collection to use.
            Defaults to None.

    Returns:
        ConfigType: config to construct the LogBook
    """
    loggers: ConfigType = {}
    if logger_dir is not None:
        loggers["filesystem"] = {
            "logger_dir": logger_dir,
            "logger_name": name,
            "write_to_console": write_to_console,
            "filename": filename,
            "create_multiple_log_files": create_multiple_log_files,
            "filename_prefix": filename_prefix,
        }
        loggers["filesystem"]["logbook_key_map"] = None
        loggers["filesystem"]["logbook_key_prefix"] = None
    if wandb_config is not None:
        loggers["wandb"] = wandb_config
        loggers["wandb"]["logbook_key_map"] = wandb_key_map
        loggers["wandb"]["logbook_key_prefix"] = wandb_prefix_key

    if tensorboard_config is not None:
        loggers["tensorboard"] = tensorboard_config
        loggers["tensorboard"]["logbook_key_map"] = tensorboard_key_map
        loggers["tensorboard"]["logbook_key_prefix"] = tensorboard_prefix_key

    if mlflow_config is not None:
        loggers["mlflow"] = mlflow_config
        loggers["mlflow"]["logbook_key_map"] = mlflow_key_map
        loggers["mlflow"]["logbook_key_prefix"] = mlflow_prefix_key

    if mongo_config is not None:
        key = "mongo"
        loggers[key] = mongo_config
        loggers[key]["logbook_key_map"] = None
        loggers[key]["logbook_key_prefix"] = None

    config = {"id": id, "name": name, "loggers": loggers}
    return config