|
a |
|
b/9-25CpG clusterProfiler.R |
|
|
1 |
setwd("E:/workplace/mywork/methy/dbgap/chf/data_chf_contr/early_chf/c1_UMN/alldata") |
|
|
2 |
#methylation <- read.csv("methylation_probe_info.csv",sep=",") |
|
|
3 |
#save(methylation,file="methylation.Rdata") |
|
|
4 |
load(file="methylation.Rdata") |
|
|
5 |
|
|
|
6 |
load(file="E:/workplace/mywork/methy/dbgap/chf/data_chf_contr/early_chf/c1_UMN/alldata/methylation.Rdata") |
|
|
7 |
setwd("E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary") |
|
|
8 |
id3 <- read.table("xgblasso_DMP.csv",sep=",",header = T) |
|
|
9 |
head(id3) |
|
|
10 |
id3 = as.character(id3$Feature) |
|
|
11 |
venn_CpG = methylation[methylation$probe.id %in% id3[-c(2,6,17,20,21)],] |
|
|
12 |
setdiff(id3[-c(2,6,17,20,21)],venn_CpG$probe.id) |
|
|
13 |
#"cg06344265" |
|
|
14 |
venn_CpG$probe.id = as.character(venn_CpG$probe.id) |
|
|
15 |
venn_CpG$UCSC_RefGene_Name = as.character(venn_CpG$UCSC_RefGene_Name) |
|
|
16 |
venn_CpG$Relation_to_UCSC_CpG_Island = as.character(venn_CpG$Relation_to_UCSC_CpG_Island) |
|
|
17 |
add_cg = c("cg06344265",NA,NA,NA,NA,NA,NA,NA,NA,37,11,NA,NA,11,NA,NA,NA,NA,NA,NA,"GRIK4",NA,"TSS200",NA,"Open sea",NA,NA,NA,NA,NA,NA,NA) |
|
|
18 |
venn_CpG = rbind(add_cg,venn_CpG) |
|
|
19 |
write.table(venn_CpG,"venn_probe.csv",sep=",") |
|
|
20 |
{ |
|
|
21 |
#all dmp |
|
|
22 |
id3 = read.csv("new_champ/myDMP.txt",sep="\t") |
|
|
23 |
id3 = rownames_to_column(id3,"probe.id")#23581 |
|
|
24 |
id3 <- as.character(id3$probe.id) |
|
|
25 |
venn_CpG = methylation[methylation$probe.id %in% id3,] |
|
|
26 |
setdiff(id3,venn_CpG$probe.id) |
|
|
27 |
|
|
|
28 |
id3 = read.csv("new_champ/myDMP.txt",sep="\t") |
|
|
29 |
id3 = rownames_to_column(id3,"probe.id")#23581 |
|
|
30 |
logFC_cutoff2 <- with( id3, mean( abs( logFC ) ) + 2 * sd( abs( logFC ) ) ) |
|
|
31 |
id3 <- id3[id3$adj.P.Val<0.05 & abs(id3$logFC) > logFC_cutoff2,]#318 |
|
|
32 |
venn_CpG = methylation[methylation$probe.id %in% id3$probe.id,] |
|
|
33 |
setdiff(id3$probe.id,venn_CpG$probe.id) |
|
|
34 |
|
|
|
35 |
} |
|
|
36 |
#RefGene & Island |
|
|
37 |
{ |
|
|
38 |
venn_CpG$UCSC_RefGene_Group = as.character(venn_CpG$UCSC_RefGene_Group) |
|
|
39 |
venn_CpG$UCSC_RefGene_Name = as.character(venn_CpG$UCSC_RefGene_Name) |
|
|
40 |
venn_CpG$UCSC_RefGene_Group = ifelse(venn_CpG$UCSC_RefGene_Group == "", "IGR",venn_CpG$UCSC_RefGene_Group) |
|
|
41 |
venn_CpG$Relation_to_UCSC_CpG_Island = ifelse(venn_CpG$Relation_to_UCSC_CpG_Island == "", "Open sea",venn_CpG$Relation_to_UCSC_CpG_Island) |
|
|
42 |
|
|
|
43 |
#UCSC_RefGene_Group |
|
|
44 |
paste0(venn_CpG$UCSC_RefGene_Group,collapse=",") |
|
|
45 |
A<-strsplit(as.character(venn_CpG$UCSC_RefGene_Group), ";") |
|
|
46 |
tmp2 <- mapply( cbind, A ) |
|
|
47 |
df0 <- ldply (tmp2, data.frame) |
|
|
48 |
prop.table(table(df0$X..i..)) |
|
|
49 |
#TSS200 IGR TSS1500 Body 1stExon 5'UTR |
|
|
50 |
#0.050 0.200 0.250 0.275 0.050 0.175 |
|
|
51 |
#Relation_to_UCSC_CpG_Island |
|
|
52 |
paste0(venn_CpG$Relation_to_UCSC_CpG_Island,collapse=",") |
|
|
53 |
A<-strsplit(as.character(venn_CpG$Relation_to_UCSC_CpG_Island), ";") |
|
|
54 |
tmp2 <- mapply( cbind, A ) |
|
|
55 |
df1 <- ldply (tmp2, data.frame) |
|
|
56 |
prop.table(table(df1$X..i..)) |
|
|
57 |
#Open sea N_Shelf Island S_Shore N_Shore S_Shelf |
|
|
58 |
#0.44 0.04 0.20 0.08 0.20 0.04 |
|
|
59 |
#all cpg--methylation |
|
|
60 |
methylation$UCSC_RefGene_Group = as.character(methylation$UCSC_RefGene_Group) |
|
|
61 |
methylation$Relation_to_UCSC_CpG_Island = as.character(methylation$Relation_to_UCSC_CpG_Island) |
|
|
62 |
methylation$UCSC_RefGene_Group = ifelse(methylation$UCSC_RefGene_Group == "", "IGR",methylation$UCSC_RefGene_Group) |
|
|
63 |
methylation$Relation_to_UCSC_CpG_Island = ifelse(methylation$Relation_to_UCSC_CpG_Island == "", "Open sea",methylation$Relation_to_UCSC_CpG_Island) |
|
|
64 |
|
|
|
65 |
A<-strsplit(as.character(methylation$UCSC_RefGene_Group), ";") |
|
|
66 |
tmp2 <- mapply( cbind, A ) |
|
|
67 |
df00 <- ldply (tmp2, data.frame) |
|
|
68 |
head(df00) |
|
|
69 |
prop.table(table(df00$X..i..)) |
|
|
70 |
#IGR Body 3'UTR TSS1500 5'UTR 1stExon TSS200 |
|
|
71 |
#0.13726348 0.35680909 0.03739231 0.15484313 0.13165007 0.06802518 0.11401675 |
|
|
72 |
A<-strsplit(as.character(methylation$Relation_to_UCSC_CpG_Island), ";") |
|
|
73 |
tmp2 <- mapply( cbind, A ) |
|
|
74 |
df11 <- ldply (tmp2, data.frame) |
|
|
75 |
head(df11) |
|
|
76 |
prop.table(table(df11$X..i..)) |
|
|
77 |
#Open sea S_Shore N_Shelf S_Shelf N_Shore Island |
|
|
78 |
#0.35281111 0.10314165 0.05029941 0.04472638 0.13191608 0.31710536 |
|
|
79 |
} |
|
|
80 |
|
|
|
81 |
#The 25 CpGs associated with HFrisk model |
|
|
82 |
{ |
|
|
83 |
library(ELMER) |
|
|
84 |
nearGenes <- GetNearGenes(data = mae, |
|
|
85 |
probes = id3[c(1,3:5,7:16,18:19,22:30)], |
|
|
86 |
numFlankingGenes = 20) |
|
|
87 |
} |
|
|
88 |
#enrichKEGG |
|
|
89 |
{ |
|
|
90 |
library(org.Hs.eg.db) |
|
|
91 |
library(clusterProfiler) |
|
|
92 |
library(plyr) |
|
|
93 |
library(ggplot2) |
|
|
94 |
library(forcats) |
|
|
95 |
library(enrichplot) |
|
|
96 |
library(ReactomePA) |
|
|
97 |
library(DOSE) |
|
|
98 |
library(org.Hs.eg.db) |
|
|
99 |
library(clusterProfiler) |
|
|
100 |
library(clusterProfiler) |
|
|
101 |
library(topGO) |
|
|
102 |
library(Rgraphviz) |
|
|
103 |
library(RDAVIDWebService) |
|
|
104 |
library(plyr) |
|
|
105 |
library(stringr) |
|
|
106 |
library(ggplot2) |
|
|
107 |
|
|
|
108 |
#enrichKEGG |
|
|
109 |
kegg_SYMBOL_hsa <- function(genes){ |
|
|
110 |
gene.df <- bitr(genes, fromType = "SYMBOL", |
|
|
111 |
toType = c("SYMBOL", "ENTREZID"), |
|
|
112 |
OrgDb = org.Hs.eg.db) |
|
|
113 |
head(gene.df) |
|
|
114 |
diff.kk <- enrichKEGG(gene = gene.df$ENTREZID, |
|
|
115 |
organism = 'hsa', |
|
|
116 |
pvalueCutoff = 0.99, |
|
|
117 |
qvalueCutoff = 0.99 |
|
|
118 |
) |
|
|
119 |
return( setReadable(diff.kk, OrgDb = org.Hs.eg.db,keyType = 'ENTREZID')) |
|
|
120 |
} |
|
|
121 |
paste0(venn_CpG$UCSC_RefGene_Name,collapse=",") |
|
|
122 |
A<-strsplit(as.character(venn_CpG$UCSC_RefGene_Name), ";") |
|
|
123 |
tmp2 <- mapply( cbind, A ) |
|
|
124 |
df0 <- ldply (tmp2, data.frame) |
|
|
125 |
trunk_kk=kegg_SYMBOL_hsa(df0[,1]) |
|
|
126 |
trunk_df=trunk_kk@result |
|
|
127 |
write.csv(trunk_df,file = 'enrichKEGG_result.csv') |
|
|
128 |
#other |
|
|
129 |
y <- mutate(trunk_df, richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio))) |
|
|
130 |
parse_ratio <- function(ratio) { |
|
|
131 |
ratio <- sub("^\\s*", "", as.character(ratio)) |
|
|
132 |
ratio <- sub("\\s*$", "", ratio) |
|
|
133 |
numerator <- as.numeric(sub("/\\d+$", "", ratio)) |
|
|
134 |
denominator <- as.numeric(sub("^\\d+/", "", ratio)) |
|
|
135 |
return(numerator/denominator) |
|
|
136 |
} |
|
|
137 |
y2 <- mutate(y, FoldEnrichment = parse_ratio(GeneRatio) / parse_ratio(BgRatio)) |
|
|
138 |
y3 <- mutate(y2, geneRatio2 = parse_ratio(GeneRatio)) %>% |
|
|
139 |
arrange(desc(GeneRatio)) |
|
|
140 |
tmp <- y2[1:20,] |
|
|
141 |
ggplot(tmp, showCategory = 20,aes(FoldEnrichment, fct_reorder(Description, FoldEnrichment))) + |
|
|
142 |
geom_segment(aes(xend=0, yend = Description)) + |
|
|
143 |
geom_point(aes(color=pvalue, size = Count)) + |
|
|
144 |
scale_color_viridis_c(guide=guide_colorbar(reverse=TRUE)) + |
|
|
145 |
scale_size_continuous(range=c(2, 10)) + |
|
|
146 |
theme_minimal() + |
|
|
147 |
xlab("Fold Enrichment") + #Rich Factor |
|
|
148 |
ylab(NULL) + |
|
|
149 |
ggtitle("Enriched Pathway Ontology") |
|
|
150 |
write.csv(y2,file = 'enrichKEGG_result2.csv') |
|
|
151 |
write.csv(y3,file = 'enrichKEGG_result3.csv') |
|
|
152 |
} |
|
|
153 |
#enrichGO |
|
|
154 |
{ |
|
|
155 |
bit_gene <- function(genes){ |
|
|
156 |
gene.df <- bitr(genes, fromType = "SYMBOL", |
|
|
157 |
toType = c("SYMBOL", "ENTREZID"), |
|
|
158 |
OrgDb = org.Hs.eg.db) |
|
|
159 |
return(gene.df$ENTREZID) |
|
|
160 |
} |
|
|
161 |
bp <- enrichGO(bit_gene( as.character(df0[,1])), ont="all",OrgDb = "org.Hs.eg.db") |
|
|
162 |
bpp = bp@result |
|
|
163 |
bp3 <- setReadable(bp, OrgDb = org.Hs.eg.db) |
|
|
164 |
barplot(bp, split="ONTOLOGY")+ facet_grid(ONTOLOGY~., scale="free") |
|
|
165 |
write.table(bp@result,"enrichGO_result.csv") |
|
|
166 |
bp = as.data.frame(bp) |
|
|
167 |
enrichMap(bp) |
|
|
168 |
goplot(bp, showCategory = 10) |
|
|
169 |
bp2 <-simplify(bp, cutoff=0.7,by="p.adjust", select_fun=min) |
|
|
170 |
as.data.frame(bp2) |
|
|
171 |
write.csv(bp2,file = 'enrichGO_simplify.csv') |
|
|
172 |
} |
|
|
173 |
#gometh: kegg and go;gsameth |
|
|
174 |
{ |
|
|
175 |
library(limma) |
|
|
176 |
library(minfi) |
|
|
177 |
library(IlluminaHumanMethylation450kanno.ilmn12.hg19) |
|
|
178 |
library(IlluminaHumanMethylation450kmanifest) |
|
|
179 |
library(RColorBrewer) |
|
|
180 |
library(missMethyl) |
|
|
181 |
library(matrixStats) |
|
|
182 |
library(minfiData) |
|
|
183 |
|
|
|
184 |
setwd("E:\\workplace\\mywork\\methy\\dbgap\\chf\\data_chf_contr\\early_chf\\c1_UMN_JHU\\train_UMN_tset_JHU/1123_dataSummary") |
|
|
185 |
id3 <- read.table("xgblasso_DMP.csv",sep=",",header = T) |
|
|
186 |
head(id3) |
|
|
187 |
id3 <- as.character(id3$Feature) |
|
|
188 |
|
|
|
189 |
# Get all the CpG sites used in the analysis to form the background |
|
|
190 |
load(file= "new_champ/CorrectedBeta.Rdata") |
|
|
191 |
all <-rownames(CorrectedBeta) |
|
|
192 |
# Total number of CpG sites tested |
|
|
193 |
length(all) |
|
|
194 |
#methylGSA |
|
|
195 |
{ |
|
|
196 |
library(methylGSA) |
|
|
197 |
library(IlluminaHumanMethylation450kanno.ilmn12.hg19) |
|
|
198 |
library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19) |
|
|
199 |
|
|
|
200 |
DEG_filter = read.csv("new_champ/myDMP.txt",sep="\t") |
|
|
201 |
DEG_filter = rownames_to_column(DEG_filter,"probe.id")#23581 |
|
|
202 |
cpg.pval <- as.data.frame(DEG_filter[DEG_filter$probe.id %in% c(id3[-c(2,6,17,20,21)]),c("probe.id","P.Value")])#514 |
|
|
203 |
c = paste(DEG_filter[DEG_filter$probe.id %in% c(id3[-c(2,6,17,20,21)]),"probe.id"],DEG_filter[DEG_filter$probe.id %in% c(id3[-c(2,6,17,20,21)]),"P.Value"],sep = "=",collapse = ",") |
|
|
204 |
cpg.pval <- c(cg20051875=3.0845273397844e-09,cg24205914=9.48317106472548e-07,cg10083824=1.23286930624815e-06,cg03556243=4.95956762541123e-06,cg03233656=1.032434350337e-05,cg21429551=1.12835004011784e-05,cg16781992=1.33529811893581e-05,cg27401945=1.34687254097115e-05,cg08614290=3.97354136093555e-05,cg05363438=4.55923591304913e-05,cg08101977=5.07348656655275e-05,cg10556349=5.36252008917427e-05,cg13352914=9.88886388394135e-05,cg11853697=0.000116194512198325,cg00045910=0.000133863261077109,cg00495303=0.000141563110724823,cg00522231=0.000155960771094521,cg06344265=0.000258547818107834,cg21024264=0.000272396328465786,cg05481257=0.000326204702802846,cg23299445=0.000569220592642687,cg05845376=0.000637513022532287,cg17766026=0.000662632200376124,cg25755428=0.00083234067533089,cg07041999=0.000990852821362967) |
|
|
205 |
head(cpg.pval) |
|
|
206 |
library(org.Hs.eg.db) |
|
|
207 |
res1 = methylglm(cpg.pval = cpg.pval, #Names should be CpG IDs. |
|
|
208 |
minsize = 200, |
|
|
209 |
maxsize = 500, |
|
|
210 |
GS.type = "KEGG")#"GO", "KEGG", or "Reactome" |
|
|
211 |
#2 gene sets |
|
|
212 |
res6 = methylglm(cpg.pval = cpg.pval, |
|
|
213 |
GS.type = "Reactome", |
|
|
214 |
minsize = 100, |
|
|
215 |
maxsize = 500) |
|
|
216 |
#0 gene sets |
|
|
217 |
res7 = methylglm(cpg.pval = cpg.pval, |
|
|
218 |
GS.type = "GO", |
|
|
219 |
minsize = 100, |
|
|
220 |
maxsize = 500) |
|
|
221 |
#449 gene sets |
|
|
222 |
res2 = methylRRA(cpg.pval = cpg.pval, |
|
|
223 |
method = "ORA", |
|
|
224 |
minsize = 200, |
|
|
225 |
maxsize = 500) |
|
|
226 |
#259 gene sets |
|
|
227 |
res3 = methylRRA(cpg.pval = cpg.pval, |
|
|
228 |
method = "GSEA", |
|
|
229 |
minsize = 200, |
|
|
230 |
maxsize = 500) |
|
|
231 |
#259 gene sets |
|
|
232 |
res4 = methylgometh(cpg.pval = cpg.pval, |
|
|
233 |
sig.cut = 0.05, |
|
|
234 |
minsize = 200, |
|
|
235 |
maxsize = 500) |
|
|
236 |
data(CpG2Genetoy) |
|
|
237 |
head(CpG2Gene) |
|
|
238 |
FullAnnot = prepareAnnot(CpG2Gene) |
|
|
239 |
data(GSlisttoy) |
|
|
240 |
GS.list = GS.list[1:10] |
|
|
241 |
res5 = methylRRA(cpg.pval = cpg.pval, |
|
|
242 |
FullAnnot = FullAnnot, |
|
|
243 |
method = "ORA", |
|
|
244 |
GS.list = GS.list, |
|
|
245 |
GS.idtype = "SYMBOL", |
|
|
246 |
minsize = 100, |
|
|
247 |
maxsize = 300) |
|
|
248 |
#10 gene sets |
|
|
249 |
} |
|
|
250 |
#par(mfrow=c(1,1)) |
|
|
251 |
gst <- gometh(sig.cpg = id3[-c(2,6,17,20,21)], all.cpg=all, collection = "GO",plot.bias=TRUE) |
|
|
252 |
GO_result = topGO(gst) |
|
|
253 |
topGSA(gst, n=10) |
|
|
254 |
write.table(GO_result,"gometh_toGO.csv",sep=",") |
|
|
255 |
kegg <- gometh(sig.cpg = id3[-c(2,6,17,20,21)], all.cpg = all, collection = "KEGG", prior.prob=TRUE) |
|
|
256 |
#kegg <- gometh(sig.cpg = set, all.cpg = all, collection = "KEGG", prior.prob=TRUE) |
|
|
257 |
KEGG_result = topKEGG(kegg) |
|
|
258 |
topGSA(kegg, n=10) |
|
|
259 |
#gst.kegg.prom <- gometh(sig.cpg=id3[-c(2,6,17,20,21)], all.cpg=all, collection="KEGG",genomic.features = c("TSS200","TSS1500","1stExon")) |
|
|
260 |
#topGSA(gst.kegg.prom, n=10) |
|
|
261 |
#gst.kegg.body <- gometh(sig.cpg=id3[-c(2,6,17,20,21)], all.cpg=all, collection="KEGG", genomic.features = c("Body")) |
|
|
262 |
#topGSA(gst.kegg.body, n=10) |
|
|
263 |
write.table(KEGG_result,"gometh_toKEGG.csv",sep=",") |
|
|
264 |
|
|
|
265 |
hallmark <- readRDS(url("http://bioinf.wehi.edu.au/MSigDB/v7.1/Hs.h.all.v7.1.entrez.rds")) |
|
|
266 |
gsa <- gsameth(sig.cpg=id3[-c(2,6,17,20,21)], all.cpg=all, collection=hallmark) |
|
|
267 |
GSA_result = topGSA(gsa) |
|
|
268 |
write.table(GSA_result,"gsameth_topGSA.csv",sep=",") |
|
|
269 |
|
|
|
270 |
} |
|
|
271 |
#3.enrichPathway |
|
|
272 |
{ |
|
|
273 |
x <- enrichPathway(gene = bit_gene(df0[,1]) ,pvalueCutoff=0.95, readable=T ) |
|
|
274 |
X = as.data.frame(x) |
|
|
275 |
parse_ratio <- function(ratio) { |
|
|
276 |
ratio <- sub("^\\s*", "", as.character(ratio)) |
|
|
277 |
ratio <- sub("\\s*$", "", ratio) |
|
|
278 |
numerator <- as.numeric(sub("/\\d+$", "", ratio)) |
|
|
279 |
denominator <- as.numeric(sub("^\\d+/", "", ratio)) |
|
|
280 |
return(numerator/denominator) |
|
|
281 |
} |
|
|
282 |
|
|
|
283 |
y <- mutate(X, richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio))) |
|
|
284 |
y3 <- mutate(y, geneRatio1 = parse_ratio(GeneRatio)) |
|
|
285 |
y4 <- mutate(y3, geneRatio2 = parse_ratio(GeneRatio) / parse_ratio(BgRatio)) |
|
|
286 |
library(ggplot2) |
|
|
287 |
library(forcats) |
|
|
288 |
library(enrichplot) |
|
|
289 |
tmp <- y4[1:20,] |
|
|
290 |
ggplot(tmp, showCategory = 20,aes(geneRatio2, fct_reorder(Description, geneRatio2))) + |
|
|
291 |
geom_segment(aes(xend=0, yend = Description)) + |
|
|
292 |
geom_point(aes(color=pvalue, size = Count)) + |
|
|
293 |
scale_color_viridis_c(guide=guide_colorbar(reverse=TRUE)) + |
|
|
294 |
scale_size_continuous(range=c(2, 10)) + |
|
|
295 |
theme_minimal() + |
|
|
296 |
xlab("Fold Enrichment") + |
|
|
297 |
ylab(NULL) + |
|
|
298 |
ggtitle("Enriched Pathway Ontology") |
|
|
299 |
tmp$log_pvalue <- -log10(tmp$pvalue) |
|
|
300 |
ggplot(tmp, showCategory = 20,aes(log_pvalue, fct_reorder(Description, log_pvalue))) + |
|
|
301 |
geom_segment(aes(xend=0, yend = Description)) + |
|
|
302 |
geom_point(aes(size = Count)) + |
|
|
303 |
scale_color_viridis_c(guide=guide_colorbar(reverse=TRUE)) + |
|
|
304 |
scale_size_continuous(range=c(2, 10)) + |
|
|
305 |
theme_minimal() + |
|
|
306 |
xlab("Fold Enrichment") + |
|
|
307 |
ylab(NULL) + |
|
|
308 |
ggtitle("Enriched Pathway Ontology") |
|
|
309 |
dotplot(x , color = "pvalue",showCategory = 20, font.size = 12, title = "") |
|
|
310 |
barplot(x, showCategory=10) |
|
|
311 |
emapplot(x,showCategory=20,color = "pvalue") |
|
|
312 |
heatplot(x,showCategory=20) |
|
|
313 |
#heatplot(x, foldChange=geneList) |
|
|
314 |
cnetplot(x, categorySize="pvalue")#foldChange=geneList |
|
|
315 |
upsetplot(x) |
|
|
316 |
write.csv(y4,file = 'enrichPathway.csv') |
|
|
317 |
} |
|
|
318 |
#4.enrichDO |
|
|
319 |
{ |
|
|
320 |
x <- enrichDO(gene = bit_gene( df0[,1]), |
|
|
321 |
ont = "DO", |
|
|
322 |
pvalueCutoff = 0.5, |
|
|
323 |
pAdjustMethod = "BH", |
|
|
324 |
universe = x@universe, |
|
|
325 |
minGSSize = 5, |
|
|
326 |
maxGSSize = 500, |
|
|
327 |
qvalueCutoff = 0.5, |
|
|
328 |
readable = FALSE) |
|
|
329 |
x@result |
|
|
330 |
write.csv(x@result,file = 'enrichDO.csv') |
|
|
331 |
|
|
|
332 |
#plot |
|
|
333 |
emapplot(x) |
|
|
334 |
x <- setReadable(x, 'org.Hs.eg.db') |
|
|
335 |
head(x) |
|
|
336 |
cnetplot(x) |
|
|
337 |
write.csv(x,file = 'enrichDO2.csv') |
|
|
338 |
} |
|
|
339 |
#5.enrichNCG |
|
|
340 |
{ |
|
|
341 |
ncg <- enrichNCG(bit_gene(df0[,1]),pvalueCutoff = 0.95) |
|
|
342 |
head(ncg,10) |
|
|
343 |
dotplot(ncg, showCategory=30) + ggtitle("dotplot for GSEA") |
|
|
344 |
ridgeplot(ncg) |
|
|
345 |
gseaplot2(ncg, geneSetID = 1, title = ncg$Description[1]) |
|
|
346 |
gseaplot2(ncg, geneSetID = 1:3) |
|
|
347 |
gseaplot2(ncg, geneSetID = 1:3, pvalue_table = TRUE,color = c("#E495A5", "#86B875", "#7DB0DD"), ES_geom = "dot") |
|
|
348 |
write.csv(ncg,file = 'enrichNCG.csv') |
|
|
349 |
} |
|
|
350 |
#6. enrichDGN |
|
|
351 |
{ |
|
|
352 |
dgn <- enrichDGN(bit_gene(df0[,1]),pvalueCutoff = 0.55) |
|
|
353 |
head(dgn,20) |
|
|
354 |
dgn <- data.frame(dgn) |
|
|
355 |
y <- mutate(dgn, richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio))) |
|
|
356 |
library(ggplot2) |
|
|
357 |
library(forcats) |
|
|
358 |
library(enrichplot) |
|
|
359 |
tmp <- y[1:20,] |
|
|
360 |
ggplot(tmp, showCategory = 20,aes(richFactor, fct_reorder(Description, richFactor))) + |
|
|
361 |
geom_segment(aes(xend=0, yend = Description)) + |
|
|
362 |
geom_point(aes(color=pvalue, size = Count)) + |
|
|
363 |
scale_color_viridis_c(guide=guide_colorbar(reverse=TRUE)) + |
|
|
364 |
scale_size_continuous(range=c(2, 10)) + |
|
|
365 |
theme_minimal() + |
|
|
366 |
xlab("rich factor") + |
|
|
367 |
ylab(NULL) + |
|
|
368 |
ggtitle("Enriched Pathway Ontology") |
|
|
369 |
write.csv(y,file = 'enrichDGN.csv') |
|
|
370 |
edox <- setReadable(dgn, 'org.Hs.eg.db', 'ENTREZID') |
|
|
371 |
cnetplot(edox) |
|
|
372 |
emapplot(dgn) |
|
|
373 |
} |
|
|
374 |
#7.enricher |
|
|
375 |
{ |
|
|
376 |
library(magrittr) |
|
|
377 |
library(clusterProfiler) |
|
|
378 |
|
|
|
379 |
#data(geneList, package="DOSE") |
|
|
380 |
#gene <- names(geneList)[abs(geneList) > 2] |
|
|
381 |
wpgmtfile <- system.file("extdata/wikipathways-20180810-gmt-Homo_sapiens.gmt", package="clusterProfiler") |
|
|
382 |
#wpgmtfile <- system.file("extdata/c5.cc.v5.0.entrez.gmt", package="clusterProfiler") |
|
|
383 |
wp2gene <- read.gmt(wpgmtfile) |
|
|
384 |
head(wp2gene) |
|
|
385 |
wp2gene <- wp2gene %>% tidyr::separate(ont, c("name","version","wpid","org"), "%") |
|
|
386 |
wpid2gene <- wp2gene %>% dplyr::select(wpid, gene) #TERM2GENE |
|
|
387 |
wpid2name <- wp2gene %>% dplyr::select(wpid, name) #TERM2NAME |
|
|
388 |
ewp <- enricher(bit_gene(df0[,1]), TERM2GENE = wpid2gene, TERM2NAME = wpid2name) |
|
|
389 |
head(ewp) |
|
|
390 |
write.csv(ewp,file = 'enricher_gmt_Homo_sapiens.csv') |
|
|
391 |
#====== |
|
|
392 |
gmtfile <- system.file("extdata", "c5.cc.v5.0.entrez.gmt", package="clusterProfiler") |
|
|
393 |
c5 <- read.gmt(gmtfile) |
|
|
394 |
egmt <- enricher(bit_gene(df0[,1]), TERM2GENE=c5) |
|
|
395 |
head(egmt) |
|
|
396 |
} |
|
|
397 |
#8.msigdbr:enricher |
|
|
398 |
{ |
|
|
399 |
library(msigdbr) |
|
|
400 |
|
|
|
401 |
m_df <- msigdbr(species = "Homo sapiens") |
|
|
402 |
head(m_df, 2) %>% as.data.frame |
|
|
403 |
m_t2g <- msigdbr(species = "Homo sapiens", category = "C3") %>% |
|
|
404 |
dplyr::select(gs_name, entrez_gene) |
|
|
405 |
head(m_t2g) |
|
|
406 |
em <- enricher(bit_gene(df0[,1]), TERM2GENE=m_t2g) |
|
|
407 |
head(em) |
|
|
408 |
} |
|
|
409 |
#9.groupGO |
|
|
410 |
{ |
|
|
411 |
ggo <- groupGO(gene = bit_gene(df0[,1]), |
|
|
412 |
OrgDb = org.Hs.eg.db, |
|
|
413 |
ont = "BP", |
|
|
414 |
level = 3, |
|
|
415 |
readable = TRUE) |
|
|
416 |
|
|
|
417 |
ggo2 <- groupGO(gene = bit_gene(df0[,1]), |
|
|
418 |
OrgDb = org.Hs.eg.db, |
|
|
419 |
ont = "CC", |
|
|
420 |
level = 3, |
|
|
421 |
readable = TRUE) |
|
|
422 |
ggo3 <- groupGO(gene = bit_gene(df0[,1]), |
|
|
423 |
OrgDb = org.Hs.eg.db, |
|
|
424 |
ont = "MF", |
|
|
425 |
level = 3, |
|
|
426 |
readable = TRUE) |
|
|
427 |
|
|
|
428 |
head(ggo) |
|
|
429 |
write.csv(ggo,file = 'groupGO_BP.csv') |
|
|
430 |
write.csv(ggo2,file = 'groupGO_CC.csv') |
|
|
431 |
write.csv(ggo3,file = 'groupGO_MF.csv') |
|
|
432 |
} |
|
|
433 |
#10.search:KEGG |
|
|
434 |
{ |
|
|
435 |
library(clusterProfiler) |
|
|
436 |
search_kegg_organism('Collagen', by='kegg_code') |
|
|
437 |
ecoli <- search_kegg_organism('ece', by='scientific_name') |
|
|
438 |
dim(ecoli) |
|
|
439 |
} |
|
|
440 |
#11.enrichMKEGG |
|
|
441 |
{ |
|
|
442 |
mkk <- enrichMKEGG(gene = bit_gene(df0[,1]),organism = 'hsa') |
|
|
443 |
} |
|
|
444 |
#12.enrichMeSH |
|
|
445 |
{ |
|
|
446 |
library(meshes) |
|
|
447 |
x <- enrichMeSH(bit_gene(df0[,1]), MeSHDb = "MeSH.Hsa.eg.db", database='gendoo', category = 'C') |
|
|
448 |
head(x) |
|
|
449 |
} |