deepmodeloptim / Git / Diff of /bin/tuning.py

Models:
MarcoTheBlack/
deepmodeloptim
Downloads: 1
Diff of /bin/tuning.py [000000] .. [13a70a]
Switch to side-by-side view

--- a
+++ b/bin/tuning.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+"""CLI module for running raytune tuning experiment."""
+
+import argparse
+import logging
+import shutil
+from pathlib import Path
+from typing import Any
+
+import ray
+import yaml
+
+from stimulus.data import loaders
+from stimulus.learner import raytune_learner, raytune_parser
+from stimulus.utils import launch_utils, yaml_data, yaml_model_schema
+
+logger = logging.getLogger(__name__)
+
+
+def _raise_empty_grid() -> None:
+    """Raise an error when grid results are empty."""
+    raise RuntimeError("Ray Tune returned empty results grid")
+
+
+def get_args() -> argparse.Namespace:
+    """Get the arguments when using from the commandline.
+
+    Returns:
+        Parsed command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="Launch check_model.")
+    parser.add_argument("-d", "--data", type=str, required=True, metavar="FILE", help="Path to input csv file.")
+    parser.add_argument("-m", "--model", type=str, required=True, metavar="FILE", help="Path to model file.")
+    parser.add_argument(
+        "-e",
+        "--data_config",
+        type=str,
+        required=True,
+        metavar="FILE",
+        help="Path to data config file.",
+    )
+    parser.add_argument(
+        "-c",
+        "--model_config",
+        type=str,
+        required=True,
+        metavar="FILE",
+        help="Path to yaml config training file.",
+    )
+    parser.add_argument(
+        "-w",
+        "--initial_weights",
+        type=str,
+        required=False,
+        nargs="?",
+        const=None,
+        default=None,
+        metavar="FILE",
+        help="The path to the initial weights (optional).",
+    )
+    parser.add_argument(
+        "--ray_results_dirpath",
+        type=str,
+        required=False,
+        nargs="?",
+        const=None,
+        default=None,
+        metavar="DIR_PATH",
+        help="Location where ray_results output dir should be written. If None, uses ~/ray_results.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        type=str,
+        required=False,
+        nargs="?",
+        const="best_model.pt",
+        default="best_model.pt",
+        metavar="FILE",
+        help="The output file path to write the trained model to",
+    )
+    parser.add_argument(
+        "-bm",
+        "--best_metrics",
+        type=str,
+        required=False,
+        nargs="?",
+        const="best_metrics.csv",
+        default="best_metrics.csv",
+        metavar="FILE",
+        help="The path to write the best metrics to",
+    )
+    parser.add_argument(
+        "-bc",
+        "--best_config",
+        type=str,
+        required=False,
+        nargs="?",
+        const="best_config.yaml",
+        default="best_config.yaml",
+        metavar="FILE",
+        help="The path to write the best config to",
+    )
+    parser.add_argument(
+        "-bo",
+        "--best_optimizer",
+        type=str,
+        required=False,
+        nargs="?",
+        const="best_optimizer.pt",
+        default="best_optimizer.pt",
+        metavar="FILE",
+        help="The path to write the best optimizer to",
+    )
+    parser.add_argument(
+        "--tune_run_name",
+        type=str,
+        required=False,
+        nargs="?",
+        const=None,
+        default=None,
+        metavar="CUSTOM_RUN_NAME",
+        help=(
+            "Tells ray tune what the 'experiment_name' (i.e. the given tune_run name) should be. "
+            "If set, the subdirectory of ray_results is named with this value and its train dir is prefixed accordingly. "
+            "Default None means that ray will generate such a name on its own."
+        ),
+    )
+    parser.add_argument(
+        "--debug_mode",
+        action="store_true",
+        help="Activate debug mode for tuning. Default false, no debug.",
+    )
+    return parser.parse_args()
+
+
+def main(
+    model_path: str,
+    data_path: str,
+    data_config_path: str,
+    model_config_path: str,
+    initial_weights: str | None = None,  # noqa: ARG001
+    ray_results_dirpath: str | None = None,
+    output_path: str | None = None,
+    best_optimizer_path: str | None = None,
+    best_metrics_path: str | None = None,
+    best_config_path: str | None = None,
+    *,
+    debug_mode: bool = False,
+) -> None:
+    """Run the main model checking pipeline.
+
+    Args:
+        data_path: Path to input data file.
+        model_path: Path to model file.
+        data_config_path: Path to data config file.
+        model_config_path: Path to model config file.
+        initial_weights: Optional path to initial weights.
+        ray_results_dirpath: Directory for ray results.
+        debug_mode: Whether to run in debug mode.
+        output_path: Path to write the best model to.
+        best_optimizer_path: Path to write the best optimizer to.
+        best_metrics_path: Path to write the best metrics to.
+        best_config_path: Path to write the best config to.
+    """
+    # Convert data config to proper type
+    with open(data_config_path) as file:
+        data_config_dict: dict[str, Any] = yaml.safe_load(file)
+    data_config: yaml_data.YamlSubConfigDict = yaml_data.YamlSubConfigDict(**data_config_dict)
+
+    with open(model_config_path) as file:
+        model_config_dict: dict[str, Any] = yaml.safe_load(file)
+    model_config: yaml_model_schema.Model = yaml_model_schema.Model(**model_config_dict)
+
+    encoder_loader = loaders.EncoderLoader()
+    encoder_loader.initialize_column_encoders_from_config(column_config=data_config.columns)
+
+    model_class = launch_utils.import_class_from_file(model_path)
+
+    ray_config_loader = yaml_model_schema.YamlRayConfigLoader(model=model_config)
+    ray_config_model = ray_config_loader.get_config()
+
+    tuner = raytune_learner.TuneWrapper(
+        model_config=ray_config_model,
+        data_config_path=data_config_path,
+        model_class=model_class,
+        data_path=data_path,
+        encoder_loader=encoder_loader,
+        seed=42,
+        ray_results_dir=ray_results_dirpath,
+        debug=debug_mode,
+    )
+
+    # Ensure output_path is provided
+    if output_path is None:
+        raise ValueError("output_path must not be None")
+    try:
+        grid_results = tuner.tune()
+        if not grid_results:
+            _raise_empty_grid()
+
+        # Initialize parser with results
+        parser = raytune_parser.TuneParser(result=grid_results)
+
+        # Ensure output directory exists
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+
+        # Save outputs using proper Result object API
+        parser.save_best_model(output=output_path)
+        parser.save_best_optimizer(output=best_optimizer_path)
+        parser.save_best_metrics_dataframe(output=best_metrics_path)
+        parser.save_best_config(output=best_config_path)
+
+    except RuntimeError:
+        logger.exception("Tuning failed")
+        raise
+    except KeyError:
+        logger.exception("Missing expected result key")
+        raise
+    finally:
+        if debug_mode:
+            logger.info("Debug mode - preserving Ray results directory")
+        #elif ray_results_dirpath:
+        #    shutil.rmtree(ray_results_dirpath, ignore_errors=True)
+
+
+def run() -> None:
+    """Run the model checking script."""
+    args = get_args()
+    main(
+        data_path=args.data,
+        model_path=args.model,
+        data_config_path=args.data_config,
+        model_config_path=args.model_config,
+        initial_weights=args.initial_weights,
+        ray_results_dirpath=args.ray_results_dirpath,
+        output_path=args.output,
+        best_optimizer_path=args.best_optimizer,
+        best_metrics_path=args.best_metrics,
+        best_config_path=args.best_config,
+        debug_mode=args.debug_mode,
+    )
+
+
+if __name__ == "__main__":
+    ray.init(address="auto", ignore_reinit_error=True)
+    run()