moETM / Git / [e72cf6] /downstream_analysis/plot_sample_by

Models:
AlyssaS/
moETM
Downloads: 1
[e72cf6]: / downstream_analysis / plot_sample_by_topic.R
History
Download this file
183 lines (153 with data), 7.7 kB

## this script is used to plot sample by topic matrix

source('./utilities.R')

args <- commandArgs(trailingOnly=TRUE)
if (length(args) != 8) {
  print(length(args))
  stop(" Usage: plot_sample_by_topic.R <num_topic> <version> <type> <topic_select> <is_plot> <is_diff_topic> <is_all>", call.=FALSE)
}

num_topic=as.numeric(args[1])
version=args[2]
type=args[3]#rna_protein or rna_atac
topic_select=as.numeric(strsplit(args[4],',')[[1]])
if (is.na(topic_select)){topic_select=c()}
is_plot=args[5]=='TRUE'
is_diff_topic=args[6]=='TRUE'
is_all=args[7]=='TRUE'#cutoff=0, plot all topics


################## cell by topic matrix
## read data
cell_topic_raw=read.csv(paste0('../Result/',version,'/topic_proportion_',type,'_before_sm.csv'),header=T)

cell_info=cell_topic_raw[,102:dim(cell_topic_raw)[2]]#cell information
cell_topic=cell_topic_raw[,1:101]
colnames(cell_topic)=c('sample',1:num_topic)

###### plot
if (is_plot==T){
  #for visualiztion, subsample 
  #rows: subsample 10000
  #columns: select topics with sum(abs) > round 3st quantile
  sub_num=10000#subsample 10000 cells
  sub_ind=sample(1:dim(cell_topic)[1],sub_num,rep=F)
  
  col_sum=colSums(abs(cell_topic[,2:(num_topic+1)]))
  if (is_all){#plot all topics
    cutoff=0
  } else{#around 3/4q of col_sum
    cutoff=round(as.numeric(summary(col_sum)["3rd Qu."]))
  }
  cell_topic_select=cell_topic[sub_ind,2:(num_topic+1)][,col_sum>cutoff]
  
  #add manually selected topics
  if (length(topic_select)>0){
    old_col=colnames(cell_topic_select)
    add_col=c()
    for (i in topic_select){
      if (!(i %in% colnames(cell_topic_select))){
        cell_topic_select=cbind(cell_topic_select,cell_topic[sub_ind,i+1])#cell_topic's 1st row is sample id
        add_col=c(add_col,i)
      }
    }
    colnames(cell_topic_select)=c(old_col,add_col)
  }
  num_select_topic=dim(cell_topic_select)[2]

  ### add annotation color
  ## for covid
  manualcolors<-c('forestgreen', 'orange', 'cornflowerblue', 'darkolivegreen4', 'indianred1', 'tan4', 
                  'mediumorchid1',  'yellowgreen', 'lightsalmon', 'tan3', "tan1", 'wheat4', 
                  '#DDAD4B','chartreuse', 'moccasin', 'mediumvioletred', 'seagreen','cadetblue1',
                  "darkolivegreen1" , "tomato3" , "#7CE3D8", 'tan2',"#F39B7FFF","#91D1C2FF",
                  'red2', 'darkblue')
  if (strsplit(version,'/')[[1]][1]=='covid'){
    cell_topic_select$initial_clustering=cell_topic_raw$initial_clustering[sub_ind]
    cell_topic_select$Status_on_day_collection_summary=cell_topic_raw$Status_on_day_collection_summary[sub_ind]
    cell_topic_select$Status=cell_topic_raw$Status[sub_ind]
    
    ## color
    col_type=manualcolors[1:18]#manual checked number
    names(col_type)=unique(cell_topic_select$initial_clustering)
    
    col_severity=manualcolors[19:24]
    names(col_severity)=unique(cell_topic_select$Status_on_day_collection_summary)

    col_status=manualcolors[25:26]
    names(col_status)=unique(cell_topic_select$Status)
    
    coll = list(cellType = col_type,status = col_status,severity=col_severity)
    plot_cell_topic=cell_topic_select[order(cell_topic_select$initial_clustering),]
    
  } else if (strsplit(version,'/')[[1]][1]=='rna_atac'){
    #nips atac
    cell_topic_select$cellType=cell_info$cell_type[sub_ind]
    col_type=manualcolors[1:22]
    names(col_type)=unique(cell_info$cell_type)
    coll=list(cellType = col_type)
    plot_cell_topic=cell_topic_select[order(cell_topic_select$cellType),]
  
  } else{#nips adt
    ## broad cell type
    cell_type0=read.csv('../useful_file/cell_type_nips_color.csv',header=T)
    cell_info_new=c()
    for (i in cell_info$cell_type){
      new=cell_type0[which(cell_type0$cellType2==i),]$cellType1
      cell_info_new=c(cell_info_new,new)
    }
    
    cell_topic_select$cellType1=cell_info_new[sub_ind]
    cell_topic_select$cellType2=cell_info$cell_type[sub_ind]
    cell_topic_select_order=cell_topic_select[with(cell_topic_select,order(cellType1,cellType2)),]
    
    #color
    col_type1=cell_type0$color1
    names(col_type1)=cell_type0$cellType1
    col_type2=cell_type0$color2
    names(col_type2)=cell_type0$cellType2
    
    coll = list(cellType1 = col_type1, cellType2 = col_type2)
    plot_cell_topic=cell_topic_select[order(cell_topic_select$cellType1),]
  }
  
  save_name=paste0('../plot/',version,'/sample_topic_cutoff',cutoff,'.png')
  plot_sample_by_topic(plot_cell_topic,version,coll,num_select_topic,save_name,is_cluster=T)
  
  ##plot legend
  if (strsplit(version,'/')[[1]][1] !='rna_adt'){#not nips adt (2 cell type annotation)
    for (i in 1:length(coll)){
      labels=names(coll[[i]])
      col_fill=coll[[i]]
      lgd=Legend(labels=labels,legend_gp=gpar(fill =col_fill))
      png(paste0('../plot/',version,'/',i,'legend.png'),
          height=2000,width=1500,res=500)
      draw(lgd)
      dev.off()
    }
  } else{#nips adt
      u=unique(cell_type0$cellType1)
      u_col=unique(cell_type0$color1)
      for (i in 1:length(u)){
        cell_type0_sub=cell_type0[cell_type0$cellType1==u[i],]
      
        labels=c(u[i],unique(cell_type0_sub$cellType2))
        col_fill=c(u_col[i],unique(cell_type0_sub$color2))
      
        lgd=Legend(labels=labels,legend_gp=gpar(fill =col_fill))
        png(paste0('../plot/',version,'/',i,'legend.png'),
            height=1000,width=1500,res=500)
        draw(lgd)
        dev.off()
     }
    }
}

####### test differentially expressed topics
if (is_diff_topic==T){
  if (version=='covid'){
  ####### differentially expressed topics, remove first column which is the topic index
    ### test cell types
    topic_initial_list=diff_topic('initial_clustering',cell_info,cell_topic[,-1],num_topic)
    topic_full_list=diff_topic('full_clustering',cell_info,cell_topic[,-1],num_topic)
    ### test COVID
    topic_covid_list=diff_topic('Status',cell_info,cell_topic[,-1],num_topic)
    ### test severity
    topic_severity_list=diff_topic('Status_on_day_collection_summary',cell_info,cell_topic[,-1],num_topic)
    ### test confounder
    topic_sex_list=diff_topic('Sex',cell_info,cell_topic[,-1],num_topic,alter='two.sided')
    topic_age_list=diff_topic('Age_interval',cell_info,cell_topic[,-1],num_topic,alter='two.sided')
    topic_smoker_list=diff_topic('Smoker',cell_info,cell_topic[,-1],num_topic,alter='two.sided')
    
    ### plot differentially expressed topics
    plot_diff_topic(topic_initial_list,width=1000,topic_select=topic_select,save_name=paste0('../plot/',version,'/diff_topic_initial.png'))
    plot_diff_topic(topic_full_list,width=1000,topic_select=topic_select,save_name=paste0('.../plot/',version,'/diff_topic_full.png'))
    
    plot_diff_topic(topic_covid_list,width=200,topic_select=topic_select,paste0('../plot/',version,'/diff_topic_covid.png'))
    plot_diff_topic(topic_severity_list,width=500,topic_select=topic_select,paste0('../plot/',version,'/diff_topic_severity_sameRange.png'))
    ## confounder
    plot_diff_topic(topic_sex_list,width=250,topic_select=topic_select,paste0('../plot/',version,'/diff_topic_sex_sameRange.png'))
    plot_diff_topic(topic_age_list,width=500,topic_select=topic_select,paste0('../plot/',version,'/diff_topic_age_sameRange.png'))
    plot_diff_topic(topic_smoker_list,width=300,topic_select=topic_select,paste0('../plot/',version,'/diff_topic_smoker_sameRange.png'))
    # 
  } else{#nips, only check cell type1
    topic_cell_list=diff_topic('cell_type',cell_info,cell_topic[,-1],num_topic)
    write.csv(topic_cell_list$topic_label_p,file=paste0('..s/plot/',version,'/topic_cell_p.csv'),quote=F,row.names=F)
    plot_diff_topic(topic_cell_list,width=1200,topic_select=topic_select,save_name=paste0('../plot/',version,'/diff_topic_cell.png'))
  }
  
}