Download this file

125 lines (90 with data), 4.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# files required:
# 1) "ko01000.keg" downloaded from KEGG
# 2) "metacyc_reactions.txt" storing information on susbstrates, products of EC numbers
# output:
# "KO2EC.list.RData", "EC2CMPD.lists.RData", "KO2CMPD.lists.RData" stored in the "database" directory
library(data.table)
library(dplyr)
## generate KO2EC list #############
dbDir = "database"
fileName <- paste(dbDir,"/ko01000.keg",sep = "")
conn <- file(fileName,open="r")
linn <-readLines(conn)
#for (i in 1:length(linn)){
# print(linn[i])
#}
close(conn)
KO2EC_list <- vector("list",length=length(linn))
for(i in c(1:length(linn))){
l = linn[i]
if(!grepl("K\\d{5}",l,perl = T)) next
ko <- sub(".*(K\\d{5}).*","\\1",l)
if(grepl("\\[EC\\:(.*)\\]",l, perl = T)) ecs <- strsplit( sub(".*\\[EC\\:(.*)\\]$", "\\1", l, perl = T), " ", fixed = T)[[1]] else ecs <- ""
if(ko %in% names(KO2EC_list)) {
i_exist = which(names(KO2EC_list) == ko)
updated_ecs <- unique(c( KO2EC_list[[i_exist]], ecs))
updated_ecs <- updated_ecs[updated_ecs != ""]
KO2EC_list[[i_exist]] <- updated_ecs
next
}
KO2EC_list[[i]] <- ecs
names(KO2EC_list)[i] <- ko
}
KO2EC_list <- KO2EC_list[!sapply(KO2EC_list, is.null) ]
save(KO2EC_list, file = paste(dbDir,"/KO2EC.list.RData",sep = ""))
## generate EC2CMPD lists ##################
metacyc_rxns <- fread(paste(dbDir,"/metacyc_reactions.txt",sep = "")) %>% filter(`EC-NUMBER` != "")
EC.Substrates_list <- vector("list", length = nrow(metacyc_rxns))
EC.Products_list <- vector("list", length = nrow(metacyc_rxns))
EC.SubsProd_list <- vector("list", length = nrow(metacyc_rxns))
for(i in c(1:nrow(metacyc_rxns))){
ecs <- metacyc_rxns$`EC-NUMBER`[[i]]
ecs <- sub("\\|?EC\\-(.*)\\|?", "\\1", ecs)
rxn_drct = metacyc_rxns$`REACTION-DIRECTION`[[i]]
if(grepl('LEFT-TO-RIGHT', rxn_drct) | rxn_drct == "") {
substrates <- strsplit(metacyc_rxns$LEFT[i],";", fixed = T)[[1]]
products <- strsplit(metacyc_rxns$RIGHT[i],";",fixed = T)[[1]]
subs.prod <- NA
}else if(grepl("RIGHT-TO-LEFT", rxn_drct)){
substrates <- strsplit(metacyc_rxns$RIGHT[i], ";", fixed = T)[[1]]
products <- strsplit(metacyc_rxns$LEFT[i],";",fixed = T)[[1]]
subs.prod <- NA
}else if(rxn_drct == "REVERSIBLE"){
substrates <- NA
products <- NA
subs.prod <- unique(c(strsplit(metacyc_rxns$RIGHT[i], ";", fixed = T)[[1]], strsplit(metacyc_rxns$LEFT[i],";",fixed = T)[[1]] ))
}
EC.Substrates_list[[i]] <- substrates; names(EC.Substrates_list)[i] <- ecs
EC.Products_list[[i]] <- products; names(EC.Products_list)[i] <- ecs
EC.SubsProd_list[[i]] <- subs.prod; names(EC.SubsProd_list)[i] <- ecs
}
save(EC.Substrates_list, EC.Products_list, EC.SubsProd_list,
file = paste(dbDir,"/EC2CMPD.lists.RData",sep = ""))
## generate KO.Substrates, KO.Products and KO.SubsProd lists #########
KO.Substrates_list <- vector("list", length = length(KO2EC_list))
KO.Products_list <- vector("list", length = length(KO2EC_list))
KO.SubsProd_list <- vector("list", length = length(KO2EC_list))
for(i in c(1:length(KO2EC_list))){
ko <- names(KO2EC_list)[i]
ecs <- KO2EC_list[[ko]]
products <- vector("character")
substrates <- vector("character")
subs.prod <- vector("character")
for(ec in ecs){
list.indexes <- which(names(EC.Products_list) == ec)
for(l_ind in list.indexes){
products <- unique(append(products, EC.Products_list[[l_ind]]) )
substrates <- unique(append(substrates, EC.Substrates_list[[l_ind]]) )
subs.prod <- unique(append(subs.prod, EC.SubsProd_list[[l_ind]]) )
}
}
s.p <- intersect(products, substrates)
subs.prod <- unique(c(subs.prod, s.p)[!is.na(c(subs.prod, s.p))] ) # remove na and then unique
substrates <- substrates[!is.na(substrates) & !(substrates %in% subs.prod)] # remove na and remove subs.prod
products <- products[!is.na(products) & !(products %in% subs.prod)]
KO.Substrates_list[[i]] <- substrates; names(KO.Substrates_list)[i] <- ko
KO.Products_list[[i]] <- products; names(KO.Products_list)[i] <- ko
KO.SubsProd_list[[i]] <- subs.prod; names(KO.SubsProd_list)[i] <- ko
}
save(KO.SubsProd_list, KO.Products_list, KO.Substrates_list,
file = paste(dbDir,"/KO2CMPD.lists.RData",sep = ""))