Diff of /bin/merge_h5.py [000000] .. [d01132]

Switch to side-by-side view

--- a
+++ b/bin/merge_h5.py
@@ -0,0 +1,69 @@
+"""
+Code for merging two h5 objects
+"""
+
+import os
+import sys
+import argparse
+import logging
+
+import anndata as ad
+import scanpy as sc
+
+SRC_DIR = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "babel"
+)
+assert os.path.isdir(SRC_DIR)
+sys.path.append(SRC_DIR)
+import sc_data_loaders
+import adata_utils
+import utils
+
+
+def build_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input1", type=str)
+    parser.add_argument("input2", type=str)
+    parser.add_argument("output", type=str)
+    parser.add_argument(
+        "--mode",
+        "-m",
+        type=str,
+        default="ATAC",
+        choices=["ATAC", "RNA"],
+        help="Merging RNA or ATAC data",
+    )
+    return parser
+
+
+def main():
+    parser = build_parser()
+    args = parser.parse_args()
+
+    if args.mode == "RNA":
+        raise NotImplemented
+    else:
+        x = utils.sc_read_10x_h5_ft_type(args.input1, ft_type="Peaks")
+        y = utils.sc_read_10x_h5_ft_type(args.input2, ft_type="Peaks")
+
+        atac_bins = sc_data_loaders.harmonize_atac_intervals(x.var_names, y.var_names)
+        logging.info(f"Harmonized to {len(atac_bins)} intervals")
+
+        x_repool = sc_data_loaders.repool_atac_bins(x, atac_bins)
+        y_repool = sc_data_loaders.repool_atac_bins(y, atac_bins)
+
+        result = x_repool.concatenate(y_repool)
+
+    logging.info(f"Shape after concat: {result.shape}")
+    if "." in os.path.basename(args.output):
+        _root, ext = os.path.splitext(args.output)
+        if ext == ".h5ad":
+            result.write_h5ad(args.output)
+        else:
+            raise NotImplementedError(f"Unrecognized file extension: {ext}")
+    else:
+        adata_utils.write_adata_as_10x_dir(result, args.output)
+
+
+if __name__ == "__main__":
+    main()