Diff of /bin/merge_h5.py [000000] .. [d01132]

Switch to unified view

a b/bin/merge_h5.py
1
"""
2
Code for merging two h5 objects
3
"""
4
5
import os
6
import sys
7
import argparse
8
import logging
9
10
import anndata as ad
11
import scanpy as sc
12
13
SRC_DIR = os.path.join(
14
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "babel"
15
)
16
assert os.path.isdir(SRC_DIR)
17
sys.path.append(SRC_DIR)
18
import sc_data_loaders
19
import adata_utils
20
import utils
21
22
23
def build_parser():
24
    parser = argparse.ArgumentParser()
25
    parser.add_argument("input1", type=str)
26
    parser.add_argument("input2", type=str)
27
    parser.add_argument("output", type=str)
28
    parser.add_argument(
29
        "--mode",
30
        "-m",
31
        type=str,
32
        default="ATAC",
33
        choices=["ATAC", "RNA"],
34
        help="Merging RNA or ATAC data",
35
    )
36
    return parser
37
38
39
def main():
40
    parser = build_parser()
41
    args = parser.parse_args()
42
43
    if args.mode == "RNA":
44
        raise NotImplemented
45
    else:
46
        x = utils.sc_read_10x_h5_ft_type(args.input1, ft_type="Peaks")
47
        y = utils.sc_read_10x_h5_ft_type(args.input2, ft_type="Peaks")
48
49
        atac_bins = sc_data_loaders.harmonize_atac_intervals(x.var_names, y.var_names)
50
        logging.info(f"Harmonized to {len(atac_bins)} intervals")
51
52
        x_repool = sc_data_loaders.repool_atac_bins(x, atac_bins)
53
        y_repool = sc_data_loaders.repool_atac_bins(y, atac_bins)
54
55
        result = x_repool.concatenate(y_repool)
56
57
    logging.info(f"Shape after concat: {result.shape}")
58
    if "." in os.path.basename(args.output):
59
        _root, ext = os.path.splitext(args.output)
60
        if ext == ".h5ad":
61
            result.write_h5ad(args.output)
62
        else:
63
            raise NotImplementedError(f"Unrecognized file extension: {ext}")
64
    else:
65
        adata_utils.write_adata_as_10x_dir(result, args.output)
66
67
68
if __name__ == "__main__":
69
    main()