|
a |
|
b/main_intergration_rna_atac.py |
|
|
1 |
|
|
|
2 |
|
|
|
3 |
from moETM.train import Trainer_moETM, Train_moETM |
|
|
4 |
from dataloader import load_nips_rna_atac_dataset, prepare_nips_dataset, data_process_moETM |
|
|
5 |
from moETM.build_model import build_moETM |
|
|
6 |
import pandas as pd |
|
|
7 |
import gc |
|
|
8 |
import os |
|
|
9 |
os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
|
|
10 |
|
|
|
11 |
import warnings |
|
|
12 |
warnings.filterwarnings('ignore') |
|
|
13 |
|
|
|
14 |
# Load dataset |
|
|
15 |
mod_file_path = "./data/GSE194122_openproblems_neurips2021_multiome_BMMC_processed.h5ad" |
|
|
16 |
gene_encoding = pd.read_csv('./useful_file/gene_coding_nips_rna_atac.csv') |
|
|
17 |
|
|
|
18 |
adata_mod1, adata_mod2 = load_nips_rna_atac_dataset(mod_file_path, gene_encoding) |
|
|
19 |
gc.collect() |
|
|
20 |
|
|
|
21 |
# Prepare dataset |
|
|
22 |
adata_mod1, adata_mod2 = prepare_nips_dataset(adata_mod1, adata_mod2) |
|
|
23 |
|
|
|
24 |
# Evaluation parameters |
|
|
25 |
Eval_kwargs = {} |
|
|
26 |
Eval_kwargs['batch_col'] = 'batch_indices' |
|
|
27 |
Eval_kwargs['plot_fname'] = 'moETM_delta' |
|
|
28 |
Eval_kwargs['cell_type_col'] = 'cell_type' |
|
|
29 |
Eval_kwargs['clustering_method'] = 'louvain' |
|
|
30 |
Eval_kwargs['resolutions'] = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2] |
|
|
31 |
Eval_kwargs['plot_dir'] = './result_fig' |
|
|
32 |
|
|
|
33 |
n_total_sample = adata_mod1.shape[0] |
|
|
34 |
|
|
|
35 |
X_mod1_train_T, X_mod2_train_T, batch_index_train_T, train_adata_mod1 = data_process_moETM(adata_mod1, adata_mod2) |
|
|
36 |
|
|
|
37 |
num_batch = len(batch_index_train_T.unique()) |
|
|
38 |
input_dim_mod1 = X_mod1_train_T.shape[1] |
|
|
39 |
input_dim_mod2 = X_mod2_train_T.shape[1] |
|
|
40 |
train_num = X_mod1_train_T.shape[0] |
|
|
41 |
|
|
|
42 |
num_topic = 100 |
|
|
43 |
emd_dim = 400 |
|
|
44 |
encoder_mod1, encoder_mod2, decoder, optimizer = build_moETM(input_dim_mod1, input_dim_mod2, num_batch, num_topic=num_topic, emd_dim=emd_dim) |
|
|
45 |
|
|
|
46 |
trainer = Trainer_moETM(encoder_mod1, encoder_mod2, decoder, optimizer) |
|
|
47 |
|
|
|
48 |
Total_epoch = 500 |
|
|
49 |
batch_size = 2000 |
|
|
50 |
Train_set = [X_mod1_train_T, X_mod2_train_T, batch_index_train_T] |
|
|
51 |
Test_set = [X_mod1_train_T, X_mod2_train_T, batch_index_train_T, train_adata_mod1] |
|
|
52 |
Train_moETM(trainer, Total_epoch, train_num, batch_size, Train_set, Test_set, Eval_kwargs) |
|
|
53 |
|