b/src/move/data/io.py
+__all__ = [
+    "dump_names",
+    "dump_mappings",
+    "load_mappings",
+    "load_preprocessed_data",
+    "read_config",
+    "read_names",
+    "read_tsv",
+]
+import json
+from pathlib import Path
+from typing import Optional
+import hydra
+import numpy as np
+import pandas as pd
+from omegaconf import DictConfig
+from move import HYDRA_VERSION_BASE, conf
+from move.core.typing import BoolArray, FloatArray, ObjectArray, PathLike
+def read_config(
+    data_config_name: Optional[str], task_config_name: Optional[str], *args
+) -> DictConfig:
+    """Composes configuration for the MOVE framework.
+    Args:
+        data_config_name: Name of data configuration file
+        task_config_name: Name of task configuration file
+        *args: Additional overrides
+    Returns:
+        Merged configuration
+    """
+    overrides = []
+    if data_config_name is not None:
+        overrides.append(f"data={data_config_name}")
+    if task_config_name is not None:
+        overrides.append(f"task={task_config_name}")
+    overrides.extend(args)
+    with hydra.initialize_config_module(conf.__name__, version_base=HYDRA_VERSION_BASE):
+        return hydra.compose("main", overrides=overrides)
+def load_categorical_dataset(filepath: PathLike) -> FloatArray:
+    """Loads categorical data in a NumPy file.
+    Args:
+        filepath: Path to NumPy file containing a categorical dataset
+    Returns:
+        NumPy array containing categorical data
+    """
+    return np.load(filepath).astype(np.float32)
+def load_continuous_dataset(filepath: PathLike) -> tuple[FloatArray, BoolArray]:
+    """Loads continuous data from a NumPy file and filters out columns
+    (features) whose sum is zero. Additionally, encodes NaN values as zeros.
+    Args:
+        filepath: Path to NumPy file containing a continuous dataset
+    Returns:
+        Tuple containing (1) the NumPy dataset and (2) a mask marking columns
+        (i.e., features) that were not filtered out
+    """
+    data = np.load(filepath).astype(np.float32)
+    data[np.isnan(data)] = 0
+    mask_col = np.abs(data).sum(axis=0) != 0
+    data = data[:, mask_col]
+    return data, mask_col
+def load_preprocessed_data(
+    path: Path,
+    categorical_dataset_names: list[str],
+    continuous_dataset_names: list[str],
+) -> tuple[list[FloatArray], list[list[str]], list[FloatArray], list[list[str]]]:
+    """Loads the pre-processed categorical and continuous data.
+    Args:
+        path: Where the data is saved
+        categorical_dataset_names: List of names of the categorical datasets
+        continuous_dataset_names: List of names of the continuous datasets
+    Returns:
+        Returns two pairs of list containing (1, 3) the pre-processed data and
+        (2, 4) the lists of names of each feature
+    """
+    categorical_data, categorical_var_names = [], []
+    for dataset_name in categorical_dataset_names:
+        data = load_categorical_dataset(path / f"{dataset_name}.npy")
+        categorical_data.append(data)
+        var_names = read_names(path / f"{dataset_name}.txt")
+        categorical_var_names.append(var_names)
+    continuous_data, continuous_var_names = [], []
+    for dataset_name in continuous_dataset_names:
+        data, keep = load_continuous_dataset(path / f"{dataset_name}.npy")
+        continuous_data.append(data)
+        var_names = read_names(path / f"{dataset_name}.txt")
+        var_names = [name for i, name in enumerate(var_names) if keep[i]]
+        continuous_var_names.append(var_names)
+    return (
+        categorical_data,
+        categorical_var_names,
+        continuous_data,
+        continuous_var_names,
+    )
+def read_names(path: PathLike) -> list[str]:
+    """Reads sample names from a text file. The text file should have one line
+    per sample name.
+    Args:
+        path: Path to the text file
+    Returns:
+        A list of sample names
+    """
+    with open(path, "r", encoding="utf-8") as file:
+        return [i.strip() for i in file.readlines()]
+def read_tsv(
+    path: PathLike, sample_names: Optional[list[str]] = None
+) -> tuple[ObjectArray, np.ndarray]:
+    """Read a dataset from a TSV file. The TSV is expected to have an index
+    column (0th index).
+    Args:
+        path: Path to TSV
+        index: List of sample names used to sort/filter samples
+    Returns:
+        Tuple containing (1) feature names and (2) 2D matrix (samples x
+        features)
+    """
+    data = pd.read_csv(path, index_col=0, sep="\t")
+    if sample_names is not None:
+        data.index = data.index.astype(str, False)
+        data = data.loc[sample_names]
+    return data.columns.values, data.values
+def load_mappings(path: PathLike) -> dict[str, dict[str, int]]:
+    with open(path, "r", encoding="utf-8") as file:
+        return json.load(file)
+def dump_mappings(path: PathLike, mappings: dict[str, dict[str, int]]) -> None:
+    with open(path, "w", encoding="utf-8") as file:
+        json.dump(mappings, file, indent=4, ensure_ascii=False)
+def dump_names(path: PathLike, names: np.ndarray) -> None:
+    with open(path, "w", encoding="utf-8") as file:
+        file.writelines([f"{name}\n" for name in names])