|
a |
|
b/.Rhistory |
|
|
1 |
lmodel = lm(allResults[,i] ~ .^2, data = allPredictors) |
|
|
2 |
sink(paste0(outputLoc,"_int_",method,"txt")) |
|
|
3 |
print(summary(lmodel)) |
|
|
4 |
sink() # returns output to the console |
|
|
5 |
} |
|
|
6 |
# go through each method but only look at the 1000 SNP runs |
|
|
7 |
only1000SNPs = which(allPredictors$X.causals =="1000") |
|
|
8 |
for (i in 1:length(modelNames)) { |
|
|
9 |
method = modelNames[i] |
|
|
10 |
# perform regression analyses for each method |
|
|
11 |
lmodel = lm(allResults[only1000SNPs,i] ~ p_current + shared_corr, data = allPredictors[only1000SNPs,]) |
|
|
12 |
sink(paste0(outputLoc,"_1000_",method,"txt")) |
|
|
13 |
print(summary(lmodel)) |
|
|
14 |
sink() # returns output to the console |
|
|
15 |
} |
|
|
16 |
for (i in 1:length(modelNames)) { |
|
|
17 |
method = modelNames[i] |
|
|
18 |
# perform regression analyses for each method |
|
|
19 |
lmodel = lm(allResults[,i] ~ p_current + shared_corr, data = allPredictors[,]) |
|
|
20 |
sink(paste0(outputLoc,"_all_",method,"txt")) |
|
|
21 |
print(summary(lmodel)) |
|
|
22 |
sink() # returns output to the console |
|
|
23 |
} |
|
|
24 |
sampleSizeAsN = allPredictors$sample_size |
|
|
25 |
sampleSizeAsN[which(sampleSizeAsN == "_half")] = 8000 |
|
|
26 |
sampleSizeAsN[which(sampleSizeAsN == "_full")] = 16000 |
|
|
27 |
sampleSizeAsN[which(sampleSizeAsN == "_double")] = 32000 |
|
|
28 |
sampleSizeAsN = as.numeric(sampleSizeAsN) |
|
|
29 |
# overall results |
|
|
30 |
median(allResults$meta) # 0.05962532 |
|
|
31 |
median(allResults$subpheno) # 0.07367951 |
|
|
32 |
median(allResults$shaPRS) # 0.1139283 |
|
|
33 |
median(allResults$SMTPred) # 0.07585284 |
|
|
34 |
mean(allResults$meta) # 0.07426966 |
|
|
35 |
mean(allResults$subpheno) # 0.09124099 |
|
|
36 |
mean(allResults$shaPRS) # 0.1189231 |
|
|
37 |
mean(allResults$SMTPred) # 0.09280919 |
|
|
38 |
# Combined seems a bit low, check when it is better than subpheno |
|
|
39 |
combined_better_than_subpheno_indices = which(allResults$meta > allResults$subpheno) |
|
|
40 |
combined_better_than_subpheno = allResults[combined_better_than_subpheno_indices,] |
|
|
41 |
SMTPred_better_than_shapRS_indices = which(allResults$SMTPred > allResults$shaPRS) |
|
|
42 |
SMTPred_better_than_shapRS = allResults[SMTPred_better_than_shapRS_indices,] |
|
|
43 |
# create heatmaps |
|
|
44 |
i=1 |
|
|
45 |
# create rownames |
|
|
46 |
coln = c() |
|
|
47 |
rGs = c() |
|
|
48 |
ps= c() |
|
|
49 |
cors= c() |
|
|
50 |
splits= c() |
|
|
51 |
sample_sizes= c() |
|
|
52 |
for (i in 1:length(allPredictors[,1])) { |
|
|
53 |
print(i) |
|
|
54 |
sample_size = gsub("_", "", allPredictors[i,7]) #remove underscores |
|
|
55 |
rG= padTo_dec( allPredictors[i,2], 4) |
|
|
56 |
p= padTo_dec( round(allPredictors[i,3], 2) ,4) |
|
|
57 |
corre = padTo_dec( round(allPredictors[i,4], 2) ,4) |
|
|
58 |
annotations = paste0("rG:",rG, " p:",p, " cor:",corre, " ",allPredictors[i,5], " split:",allPredictors[i,6], " size:",sample_size) |
|
|
59 |
coln = c(coln,annotations) |
|
|
60 |
splits = c(splits,allPredictors[i,6] ) |
|
|
61 |
ps = c(ps,p ) |
|
|
62 |
cors = c(cors,corre ) |
|
|
63 |
sample_sizes = c(sample_sizes,sample_size ) |
|
|
64 |
rGs = c(rGs,paste0(rG," ") ) # add an extra space for padding, otherwise legend will be cut off as pheatmap is shit |
|
|
65 |
} |
|
|
66 |
rownames(allResults) = coln |
|
|
67 |
# want to sort the data rows by rG |
|
|
68 |
orderByRG = order( as.numeric(rGs)) |
|
|
69 |
allResults = allResults[orderByRG,] |
|
|
70 |
splits = splits[orderByRG] |
|
|
71 |
rGs = rGs[orderByRG] |
|
|
72 |
ps = ps[orderByRG] |
|
|
73 |
cors = cors[orderByRG] |
|
|
74 |
sample_sizes = sample_sizes[orderByRG] |
|
|
75 |
rGs_DF <- data.frame( rGs,as.numeric(ps),as.numeric(cors),sample_sizes, splits,row.names=rownames(allResults)) # need to match the rownames to the data for the annotation to work |
|
|
76 |
colnames(rGs_DF) <- c("rG", "p", "cor","N", "split") # this is the header for the annotation |
|
|
77 |
# function to separate regular/extra results: |
|
|
78 |
filterOutByTerm_all = function(allResults,splits,ps,cors,filterTerm = "rG:0.50") { |
|
|
79 |
subsetResults = allResults |
|
|
80 |
ps_subset = ps |
|
|
81 |
cors_subset = cors |
|
|
82 |
splits_subset = splits |
|
|
83 |
indices_kept = c() |
|
|
84 |
i=1 |
|
|
85 |
for (i in 1:nrow(subsetResults) ) { |
|
|
86 |
rowname = rownames(subsetResults[i,]) |
|
|
87 |
# check if rowname includes extra/regular, |
|
|
88 |
if ( grepl( filterTerm, rowname, fixed = TRUE) ) { |
|
|
89 |
# if yes, we replace it with nothing, and keep it |
|
|
90 |
rowname_new = gsub(filterTerm, "",rowname) #remove underscores |
|
|
91 |
rownames(subsetResults)[rownames(subsetResults) == rowname] <- rowname_new |
|
|
92 |
indices_kept = c(indices_kept, i) |
|
|
93 |
} # discard it otherwise |
|
|
94 |
} |
|
|
95 |
subsetResults = subsetResults[indices_kept,] |
|
|
96 |
# function to separate regular/extra results: |
|
|
97 |
filterOutByTerm_all = function(allResults,splits,ps,cors,filterTerm = "rG:0.50") { |
|
|
98 |
subsetResults = allResults |
|
|
99 |
ps_subset = ps |
|
|
100 |
cors_subset = cors |
|
|
101 |
splits_subset = splits |
|
|
102 |
indices_kept = c() |
|
|
103 |
i=1 |
|
|
104 |
for (i in 1:nrow(subsetResults) ) { |
|
|
105 |
rowname = rownames(subsetResults[i,]) |
|
|
106 |
# check if rowname includes extra/regular, |
|
|
107 |
if ( grepl( filterTerm, rowname, fixed = TRUE) ) { |
|
|
108 |
# if yes, we replace it with nothing, and keep it |
|
|
109 |
rowname_new = gsub(filterTerm, "",rowname) #remove underscores |
|
|
110 |
rownames(subsetResults)[rownames(subsetResults) == rowname] <- rowname_new |
|
|
111 |
indices_kept = c(indices_kept, i) |
|
|
112 |
} # discard it otherwise |
|
|
113 |
} |
|
|
114 |
subsetResults = subsetResults[indices_kept,] |
|
|
115 |
ps_subset = ps_subset[indices_kept] |
|
|
116 |
cors_subset = cors_subset[indices_kept] |
|
|
117 |
splits_subset = splits_subset[indices_kept] |
|
|
118 |
results = NULL |
|
|
119 |
results$subsetResults = subsetResults |
|
|
120 |
results$ps_subset = ps_subset |
|
|
121 |
results$cors_subset = cors_subset |
|
|
122 |
results$splits_subset = splits_subset |
|
|
123 |
return(results) |
|
|
124 |
} |
|
|
125 |
) |
|
|
126 |
# function to separate regular/extra results: |
|
|
127 |
filterOutByTerm_all = function(allResults,splits,ps,cors,filterTerm = "rG:0.50") { |
|
|
128 |
subsetResults = allResults |
|
|
129 |
ps_subset = ps |
|
|
130 |
cors_subset = cors |
|
|
131 |
splits_subset = splits |
|
|
132 |
indices_kept = c() |
|
|
133 |
i=1 |
|
|
134 |
for (i in 1:nrow(subsetResults) ) { |
|
|
135 |
rowname = rownames(subsetResults[i,]) |
|
|
136 |
# check if rowname includes extra/regular, |
|
|
137 |
if ( grepl( filterTerm, rowname, fixed = TRUE) ) { |
|
|
138 |
# if yes, we replace it with nothing, and keep it |
|
|
139 |
rowname_new = gsub(filterTerm, "",rowname) #remove underscores |
|
|
140 |
rownames(subsetResults)[rownames(subsetResults) == rowname] <- rowname_new |
|
|
141 |
indices_kept = c(indices_kept, i) |
|
|
142 |
} # discard it otherwise |
|
|
143 |
} |
|
|
144 |
subsetResults = subsetResults[indices_kept,] |
|
|
145 |
ps_subset = ps_subset[indices_kept] |
|
|
146 |
cors_subset = cors_subset[indices_kept] |
|
|
147 |
splits_subset = splits_subset[indices_kept] |
|
|
148 |
results = NULL |
|
|
149 |
results$subsetResults = subsetResults |
|
|
150 |
results$ps_subset = ps_subset |
|
|
151 |
results$cors_subset = cors_subset |
|
|
152 |
results$splits_subset = splits_subset |
|
|
153 |
return(results) |
|
|
154 |
} |
|
|
155 |
# Filter to keep the main interesting scenarios, rG 0.5, regular, full |
|
|
156 |
results_RG05 = filterOutByTerm_all(allResults,splits,ps,cors,filterTerm = "rG:0.50") |
|
|
157 |
results_regular = filterOutByTerm_all(results_RG05$subsetResults,results_RG05$splits_subset,results_RG05$ps_subset,results_RG05$cors_subset,filterTerm = "regular") |
|
|
158 |
results_full = filterOutByTerm_all(results_regular$subsetResults,results_regular$splits_subset,results_regular$ps_subset,results_regular$cors_subset,filterTerm = "size:full") |
|
|
159 |
subsetResults = results_full$subsetResults |
|
|
160 |
subset_DF <- data.frame( results_full$ps_subset, results_full$cors_subset,results_full$splits_subset,row.names=rownames(subsetResults)) # ,row.names=rownames(subsetResults) # need to match the rownames to the data for the annotation to work |
|
|
161 |
colnames(subset_DF) <- c("p","cor","split") # this is the header for the annotation |
|
|
162 |
#plotName = "shaPRS - rG:0.5, n:full, no extra" # no plotname for final publication |
|
|
163 |
plotName ="" |
|
|
164 |
pheatmap(subsetResults, main = plotName , filename=paste(outputLoc,"_subset.png", sep="" ),annotation_row=subset_DF, show_rownames = F, height=5, width=5 , cex=1 ,cluster_rows=F, cluster_cols=F) |
|
|
165 |
# Filter to keep the main interesting scenarios, rG 0.5, regular, full |
|
|
166 |
results_RG05 = filterOutByTerm_all(allResults,splits,ps,cors,filterTerm = "rG:0.50") |
|
|
167 |
results_regular = filterOutByTerm_all(results_RG05$subsetResults,results_RG05$splits_subset,results_RG05$ps_subset,results_RG05$cors_subset,filterTerm = "regular") |
|
|
168 |
results_full = filterOutByTerm_all(results_regular$subsetResults,results_regular$splits_subset,results_regular$ps_subset,results_regular$cors_subset,filterTerm = "size:full") |
|
|
169 |
subsetResults = results_full$subsetResults |
|
|
170 |
subset_DF <- data.frame( results_full$ps_subset, results_full$cors_subset,results_full$splits_subset,row.names=rownames(subsetResults)) # ,row.names=rownames(subsetResults) # need to match the rownames to the data for the annotation to work |
|
|
171 |
colnames(subset_DF) <- c("p","cor","split") # this is the header for the annotation |
|
|
172 |
plotName ="" |
|
|
173 |
### |
|
|
174 |
# Filter to keep the main interesting scenarios, rG 0.5, extra, full |
|
|
175 |
results_RG05 = filterOutByTerm_all(allResults,splits,ps,cors,filterTerm = "rG:0.50") |
|
|
176 |
results_regular = filterOutByTerm_all(results_RG05$subsetResults,results_RG05$splits_subset,results_RG05$ps_subset,results_RG05$cors_subset,filterTerm = "extra") |
|
|
177 |
results_full = filterOutByTerm_all(results_regular$subsetResults,results_regular$splits_subset,results_regular$ps_subset,results_regular$cors_subset,filterTerm = "size:full") |
|
|
178 |
subsetResults_extra = results_full$subsetResults |
|
|
179 |
subset_DF_extra <- data.frame( results_full$ps_subset, results_full$cors_subset,results_full$splits_subset,row.names=rownames(subsetResults_extra)) # ,row.names=rownames(subsetResults_extra) # need to match the rownames to the data for the annotation to work |
|
|
180 |
colnames(subset_DF_extra) <- c("p","cor","split") # this is the header for the annotation |
|
|
181 |
#plotName = "shaPRS - rG:0.5, n:full, no extra" # no plotname for final publication |
|
|
182 |
# make pheatmap on the same colour scale: |
|
|
183 |
Breaks <- seq(min(c(subsetResults, subsetResults_extra)), max(c(subsetResults, subsetResults_extra)), length = 100) |
|
|
184 |
# make pheatmap on the same colour scale: |
|
|
185 |
Breaks <- seq(min(subsetResults, subsetResults_extra), max(subsetResults, subsetResults_extra), length = 100) |
|
|
186 |
Breaks |
|
|
187 |
paste(outputLoc,"_subset.png", sep="" ) |
|
|
188 |
pheatmap(subsetResults, breaks = Breaks, main = plotName , filename=paste(outputLoc,"_subset.png", sep="" ),annotation_row=subset_DF, show_rownames = F, height=5, width=5 , cex=1 ,cluster_rows=F, cluster_cols=F) |
|
|
189 |
pheatmap(subsetResults_extra, breaks = Breaks, main = plotName , filename=paste(outputLoc,"_subset_extra.png", sep="" ),annotation_row=subset_DF_extra, show_rownames = F, height=5, width=5 , cex=1 ,cluster_rows=F, cluster_cols=F) |
|
|
190 |
args=vector() |
|
|
191 |
args =c(args,"#causals_1000_rG_0.1_A50_B50_size_half") |
|
|
192 |
args = c(args,"0.1") |
|
|
193 |
args = c(args,"C:/softwares/Cluster/0shaPRS/debug/#causals_1000_rG_0.1_A50_B50_size_half") |
|
|
194 |
args = c(args,"C:/softwares/Cluster/0shaPRS/debug/1000/10/0.1_1.0/A50_B50/size_half/") |
|
|
195 |
args = c(args,"0.1") |
|
|
196 |
args = c(args,"1.0") |
|
|
197 |
args = c(args,"C:/softwares/Cluster/0shaPRS/debug/1000/10/0.55_0.1818182/A50_B50/size_half/") |
|
|
198 |
args = c(args,"0.55") |
|
|
199 |
args = c(args,"0.1818182") |
|
|
200 |
args = c(args, "C:/softwares/Cluster/0shaPRS/debug/1000/10/1.0_0.1/A50_B50/size_half/") |
|
|
201 |
args = c(args,"1.0") |
|
|
202 |
args = c(args,"0.1") |
|
|
203 |
limitsEnabled = F |
|
|
204 |
# load input files for each method |
|
|
205 |
combined= NULL |
|
|
206 |
subpheno= NULL |
|
|
207 |
shaPRS= NULL |
|
|
208 |
SMTPred= NULL |
|
|
209 |
xlabels=vector() |
|
|
210 |
for(i in seq(from=4, to=length(args), by=4)){ # 4 as we also add the 'regular' |
|
|
211 |
baseLoc=args[i] |
|
|
212 |
print(paste0("baseLoc is: ", baseLoc)) |
|
|
213 |
current_combined = read.table(paste0(baseLoc,"combined") ,header=F) |
|
|
214 |
current_subpheno = read.table(paste0(baseLoc,"subpheno") ,header=F) |
|
|
215 |
current_shaPRS = read.table(paste0(baseLoc,"shaPRS_meta") ,header=F) |
|
|
216 |
current_SMTPred = read.table(paste0(baseLoc,"SMTPred") ,header=F) |
|
|
217 |
# replace NAs with col mean |
|
|
218 |
current_combined[is.na(current_combined[,1]), 1] <- mean(current_combined[,1], na.rm = TRUE) |
|
|
219 |
current_subpheno[is.na(current_subpheno[,1]), 1] <- mean(current_subpheno[,1], na.rm = TRUE) |
|
|
220 |
current_shaPRS[is.na(current_shaPRS[,1]), 1] <- mean(current_shaPRS[,1], na.rm = TRUE) |
|
|
221 |
current_SMTPred[is.na(current_SMTPred[,1]), 1] <- mean(current_SMTPred[,1], na.rm = TRUE) |
|
|
222 |
if (is.null(combined)) { |
|
|
223 |
combined= current_combined |
|
|
224 |
subpheno= current_subpheno |
|
|
225 |
shaPRS= current_shaPRS |
|
|
226 |
SMTPred= current_SMTPred |
|
|
227 |
} else { |
|
|
228 |
combined= cbind( combined,current_combined ) |
|
|
229 |
subpheno= cbind( subpheno, current_subpheno) |
|
|
230 |
shaPRS= cbind(shaPRS, current_shaPRS) |
|
|
231 |
SMTPred= cbind( SMTPred, current_SMTPred) |
|
|
232 |
} |
|
|
233 |
p_current = round(as.numeric(args[(i+1)]),2) |
|
|
234 |
shared_corr = round(as.numeric(args[(i+2)]),2) |
|
|
235 |
print(paste0("p_current: ",p_current, " / shared_corr: ", shared_corr, " | baseLoc: ", baseLoc)) |
|
|
236 |
xlabels = c(xlabels, paste0("p:",p_current,"/r:",shared_corr) ) |
|
|
237 |
} |
|
|
238 |
?plot |
|
|
239 |
library(qvalue) |
|
|
240 |
?qvalue_truncp |
|
|
241 |
?qvalue::qvalue_truncp |
|
|
242 |
?qvalue |
|
|
243 |
inputDataLoc="C:/0Datasets/shaPRS/DEL/EUR_JAP_T2D_SE_meta" |
|
|
244 |
blendFactorsLoc="C:/0Datasets/shaPRS/DEL/EUR_JAP_T2D_lFDR_meta_SNP_lFDR" |
|
|
245 |
outputLoc="C:/0Datasets/shaPRS/DEL/QvalManhattan" |
|
|
246 |
B12Loc = "C:/0Datasets/shaPRS/DEL/EUR_JAP_T2D_sumstats_meta" |
|
|
247 |
plotTitle="Uga" |
|
|
248 |
# 1. load data |
|
|
249 |
inputData= read.table(inputDataLoc, header = T) |
|
|
250 |
blendFactors= read.table(blendFactorsLoc, header = T) |
|
|
251 |
B12= read.table(B12Loc, header = T) |
|
|
252 |
inputData_blendFactors = merge(inputData,blendFactors, by ="SNP") # merge to make sure they are aligned |
|
|
253 |
B12_blendFactors = merge(B12,blendFactors, by ="SNP") # merge to make sure they are aligned |
|
|
254 |
# MANHATTAN PLOT |
|
|
255 |
inputData_blendFactors$P=inputData_blendFactors$Qval # add the adjusted Q vals as 'P', as that is col I would be plotting next |
|
|
256 |
lfdr_2_blending = 1-inputData_blendFactors$lFDR |
|
|
257 |
base_colour1 = 0.20 |
|
|
258 |
base_colour2 = 0.20 |
|
|
259 |
manhattanBaseColours = c(rgb(0,base_colour2,0,1),rgb(0,0,base_colour1,1) ) |
|
|
260 |
allIndices = inputData_blendFactors$BP |
|
|
261 |
# need to offset the SNP BP indices, by the previous number of indices in all previous chromosomes |
|
|
262 |
inputData_blendFactors$Offsets = 0 |
|
|
263 |
for (i in 1:21) { # we always set the next offset, so we dont loop til last Chrom |
|
|
264 |
message(i) |
|
|
265 |
CHR_SNPs = inputData_blendFactors[inputData_blendFactors$CHR == i,] |
|
|
266 |
maxBPCurrentChrom = max(CHR_SNPs$BP) |
|
|
267 |
currentOffset = CHR_SNPs$Offsets[1] |
|
|
268 |
nextOffset = currentOffset + maxBPCurrentChrom |
|
|
269 |
inputData_blendFactors[inputData_blendFactors$CHR == (i+1),9] = nextOffset |
|
|
270 |
} |
|
|
271 |
hist(B12_blendFactors$lFDR, probability = T, col ="red", xlab = "lFDR", main ="") |
|
|
272 |
plot(B12_blendFactors$lFDR, B12_blendFactors$b, col ="red", xlab = "lFDR", ylab = "SNP coef", main ="") |
|
|
273 |
# get distribution |
|
|
274 |
library(scales) |
|
|
275 |
plot(B12_blendFactors$lFDR[1:5000], B12_blendFactors$b[1:5000], col =alpha("red", 0.4), xlab = "lFDR", ylab = "SNP coef", main ="") |
|
|
276 |
plot(B12_blendFactors$lFDR[1:5000], B12_blendFactors$b[1:5000], col =alpha("red", 0.3), xlab = "lFDR", ylab = "SNP coef", main ="") |
|
|
277 |
baseLoc="C:/0Datasets/ukbb/fis/" |
|
|
278 |
phenoLoc=paste0(baseLoc, "pheno") |
|
|
279 |
covarsLoc=paste0(baseLoc, "covariates") |
|
|
280 |
pheno=read.table(phenoLoc ,header=F) |
|
|
281 |
View(pheno) |
|
|
282 |
covars=read.table(covarsLoc ,header=T) |
|
|
283 |
View(covars) |
|
|
284 |
men_index = which(covars$SEX =="M") |
|
|
285 |
female_pheno = pheno[female_index] |
|
|
286 |
female_index = which(covars$SEX =="F") |
|
|
287 |
male_pheno = pheno[men_index] |
|
|
288 |
men_index = which(covars$SEX =="M") |
|
|
289 |
female_index = which(covars$SEX =="F") |
|
|
290 |
female_pheno = pheno[female_index] |
|
|
291 |
male_pheno = pheno[men_index] |
|
|
292 |
men_index |
|
|
293 |
female_pheno = pheno$V1[female_index] |
|
|
294 |
male_pheno = pheno$V1[men_index] |
|
|
295 |
mean(female_pheno) |
|
|
296 |
mean(male_pheno) # 5.788613 |
|
|
297 |
t.test(male_pheno,female_pheno) |
|
|
298 |
var(female_pheno) |
|
|
299 |
var(male_pheno) # |
|
|
300 |
percDifference = function (data1, data2) { |
|
|
301 |
mean1 = mean(data1) |
|
|
302 |
mean2 = mean(data2) |
|
|
303 |
percDiff = round( (mean1 - mean2) / ( (mean1 + mean2)/2 ) * 100) |
|
|
304 |
return(percDiff) |
|
|
305 |
} |
|
|
306 |
percDifference = function (data1, data2) { |
|
|
307 |
percDiff = round( (data1 - data2) / ( (data1 + data2)/2 ) * 100) |
|
|
308 |
return(percDiff) |
|
|
309 |
} |
|
|
310 |
percDifference( mean(male_pheno), mean(male_pheno) ) |
|
|
311 |
percDifference( mean(male_pheno), mean(female_pheno) ) |
|
|
312 |
female_townsend = covars$TOWNSEND[female_index] |
|
|
313 |
male_townsend = covars$TOWNSEND[men_index] |
|
|
314 |
mean(female_townsend) |
|
|
315 |
mean(male_townsend) # -1.655731 |
|
|
316 |
var(female_townsend) |
|
|
317 |
var(male_townsend) |
|
|
318 |
t.test(male_townsend, female_townsend) |
|
|
319 |
percDifference( var(male_pheno), var(female_pheno) ) # FIS: 4 |
|
|
320 |
baseLoc="C:/0Datasets/ukbb/height/" |
|
|
321 |
phenoLoc=paste0(baseLoc, "pheno") |
|
|
322 |
covarsLoc=paste0(baseLoc, "covariates") |
|
|
323 |
pheno=read.table(phenoLoc ,header=F) |
|
|
324 |
covars=read.table(covarsLoc ,header=T) |
|
|
325 |
men_index = which(covars$SEX =="M") |
|
|
326 |
female_index = which(covars$SEX =="F") |
|
|
327 |
female_pheno = pheno$V1[female_index] |
|
|
328 |
male_pheno = pheno$V1[men_index] |
|
|
329 |
mean(female_pheno) # FIS: 5.788613 |
|
|
330 |
mean(male_pheno) # FIS: 5.995717 |
|
|
331 |
percDifference( mean(male_pheno), mean(female_pheno) ) # FIS: 4 |
|
|
332 |
t.test(male_pheno,female_pheno) # t = 22.523, df = 184654, p-value < 2.2e-16 |
|
|
333 |
var(female_pheno) # FIS: 3.778955 |
|
|
334 |
var(male_pheno) # FIS: 4.301227 |
|
|
335 |
percDifference( var(male_pheno), var(female_pheno) ) # FIS: 13 |
|
|
336 |
baseLoc="C:/0Datasets/ukbb/bmi/" |
|
|
337 |
phenoLoc=paste0(baseLoc, "pheno") |
|
|
338 |
covarsLoc=paste0(baseLoc, "covariates") |
|
|
339 |
pheno=read.table(phenoLoc ,header=F) |
|
|
340 |
covars=read.table(covarsLoc ,header=T) |
|
|
341 |
men_index = which(covars$SEX =="M") |
|
|
342 |
female_index = which(covars$SEX =="F") |
|
|
343 |
female_pheno = pheno$V1[female_index] |
|
|
344 |
male_pheno = pheno$V1[men_index] |
|
|
345 |
mean(female_pheno) # FIS: 5.788613 # 162.6658 |
|
|
346 |
mean(male_pheno) # FIS: 5.995717 # 175.9021 |
|
|
347 |
percDifference( mean(male_pheno), mean(female_pheno) ) # FIS: 4, Height: 8 |
|
|
348 |
t.test(male_pheno,female_pheno) # FIS/Height: p-value < 2.2e-16 |
|
|
349 |
var(female_pheno) # FIS: 3.778955, Height: 38.63668 |
|
|
350 |
var(male_pheno) # FIS: 4.301227, 45.66119 |
|
|
351 |
percDifference( var(male_pheno), var(female_pheno) ) # FIS: 13, Height: 17 |
|
|
352 |
var(female_townsend) # 7.647166 |
|
|
353 |
var(male_townsend) # 7.98267 # variance is greater for men |
|
|
354 |
mean(female_townsend) # -1.655731 |
|
|
355 |
# Compare male female townsends |
|
|
356 |
female_townsend = covars$TOWNSEND[female_index] |
|
|
357 |
male_townsend = covars$TOWNSEND[men_index] |
|
|
358 |
mean(female_townsend) # -1.655731 |
|
|
359 |
mean(male_townsend) # -1.658059 |
|
|
360 |
t.test(male_townsend, female_townsend) # p-value = 0.8553 |
|
|
361 |
var(female_townsend) # 7.647166 |
|
|
362 |
var(male_townsend) # 7.98267 # variance is greater for men |
|
|
363 |
hist(male_pheno) |
|
|
364 |
hist(female_pheno) |
|
|
365 |
hist(male_pheno) |
|
|
366 |
hist(female_pheno) |
|
|
367 |
writeLines('PATH="${RTOOLS40_HOME}\\usr\\bin;${PATH}"', con = "~/.Renviron") |
|
|
368 |
Sys.which("make") |
|
|
369 |
library("devtools") |
|
|
370 |
library(roxygen2) |
|
|
371 |
# 1) set working Dir |
|
|
372 |
#setwd("C:/Users/mk23/GoogleDrive_phd/PHD/!Publications/shaPRS/R_package_303021/shaPRS") |
|
|
373 |
setwd("C:/Users/mk23/GoogleDrive_Cam/0Publications/shaPRS/R_package/shaPRS") |
|
|
374 |
#install_github("jdstorey/qvalue") |
|
|
375 |
#install_github("mkelcb/shaprs") |
|
|
376 |
usethis::use_package("qvalue") |
|
|
377 |
usethis::use_package("Matrix") |
|
|
378 |
usethis::use_package("compiler") |
|
|
379 |
document() |
|
|
380 |
check(cran=TRUE) |
|
|
381 |
document() |
|
|
382 |
check(cran=TRUE) |
|
|
383 |
document() |
|
|
384 |
check(cran=TRUE) |
|
|
385 |
document() |
|
|
386 |
check(cran=TRUE) |
|
|
387 |
# 1. load phenos |
|
|
388 |
subphenoLoc='inst/extdata/phenoA_sumstats' |
|
|
389 |
CombinedPhenoLoc='inst/extdata/Combined_sumstats' |
|
|
390 |
blendFactorLoc='inst/extdata/myOutput_SNP_lFDR' |
|
|
391 |
subpheno= read.table(subphenoLoc, header = T) |
|
|
392 |
CombinedPheno= read.table(CombinedPhenoLoc, header = T) |
|
|
393 |
blendingFactors= read.table(blendFactorLoc, header = F) |
|
|
394 |
# 1. load phenos |
|
|
395 |
subphenoLoc='inst/extdata/phenoA_sumstats' |
|
|
396 |
subpheno_otherLoc='inst/extdata/phenoB_sumstats' |
|
|
397 |
blendFactorLoc='inst/extdata/myOutput_SNP_lFDR' |
|
|
398 |
subpheno= read.table(subphenoLoc, header = T) |
|
|
399 |
subpheno_other= read.table(subpheno_otherLoc, header = T) |
|
|
400 |
blendingFactors= read.table(blendFactorLoc, header = F) |
|
|
401 |
View(subpheno) |
|
|
402 |
View(subpheno_other) |
|
|
403 |
View(subpheno) |
|
|
404 |
View(blendingFactors) |
|
|
405 |
blendingFactors= read.table(blendFactorLoc, header = T) |
|
|
406 |
View(blendingFactors) |
|
|
407 |
# 1. Merge first the 3 tables together by RSid, so they are always aligned, x = subpheno and y = CombinedPheno ( ensure that when we check allele alignment we are comparing the same SNPs |
|
|
408 |
subpheno_otherPheno = merge(subpheno,subpheno_other,by.x = "SNP",by.y = "SNP") |
|
|
409 |
subpheno_otherPheno_blending = merge(subpheno_otherPheno,blendingFactors, by.x = "SNP", by.y = "SNP") |
|
|
410 |
document() |
|
|
411 |
check(cran=TRUE) |
|
|
412 |
# Build package |
|
|
413 |
build() |
|
|
414 |
# Test install |
|
|
415 |
install() |
|
|
416 |
#install_github("jdstorey/qvalue") |
|
|
417 |
install_github("mkelcb/shaprs") |
|
|
418 |
library("devtools") |
|
|
419 |
# Test install |
|
|
420 |
install() |
|
|
421 |
library("shaPRS") |
|
|
422 |
# II) tests |
|
|
423 |
?shaPRS_adjust |
|
|
424 |
inputDataLoc <- system.file("extdata", "shapersToydata.txt", package = "shaPRS") |
|
|
425 |
inputData= read.table(inputDataLoc, header = T) |
|
|
426 |
results = shaPRS_adjust(inputData, thresholds=c(0.5,0.99)) |
|
|
427 |
# Test LD ref blend |
|
|
428 |
?LDRefBlend |
|
|
429 |
sumstatsData = readRDS(file = system.file("extdata", "sumstatsData_toy.rds", package = "shaPRS") ) |
|
|
430 |
# read SNP map files ( same toy data for the example) |
|
|
431 |
pop1_map_rds = readRDS(file = system.file("extdata", "my_data.rds", package = "shaPRS") ) |
|
|
432 |
pop2_map_rds = readRDS(file = system.file("extdata", "my_data2.rds", package = "shaPRS") ) |
|
|
433 |
# use chrom 21 as an example |
|
|
434 |
chromNum=21 |
|
|
435 |
# load the two chromosomes from each population ( same toy data for the example) |
|
|
436 |
pop1LDmatrix = readRDS(file = system.file("extdata", "LDref.rds", package = "shaPRS") ) |
|
|
437 |
pop2LDmatrix = readRDS(file = system.file("extdata", "LDref2.rds", package = "shaPRS") ) |
|
|
438 |
# 2. grab the RSids from the map for the SNPS on this chrom, |
|
|
439 |
# each LD mat has a potentiall different subset of SNPs |
|
|
440 |
# this is guaranteed to be the same order as the pop1LDmatrix |
|
|
441 |
pop1_chrom_SNPs = pop1_map_rds[ which(pop1_map_rds$chr == chromNum),] |
|
|
442 |
# this is guaranteed to be the same order as the pop2LDmatrix |
|
|
443 |
pop2_chrom_SNPs = pop2_map_rds[ which(pop2_map_rds$chr == chromNum),] |
|
|
444 |
pop1_chrom_SNPs$pop1_id = 1:nrow(pop1_chrom_SNPs) |
|
|
445 |
pop2_chrom_SNPs$pop2_id = 1:nrow(pop2_chrom_SNPs) |
|
|
446 |
# intersect the 2 SNP lists so that we only use the ones common to both LD matrices by merging them |
|
|
447 |
chrom_SNPs_df <- merge(pop1_chrom_SNPs,pop2_chrom_SNPs, by = "rsid") |
|
|
448 |
# align the two LD matrices |
|
|
449 |
chrom_SNPs_df = alignStrands(chrom_SNPs_df, A1.x ="a1.x", A2.x ="a0.x", A1.y ="a1.y", A2.y ="a0.y") |
|
|
450 |
# align the summary for phe A and B |
|
|
451 |
sumstatsData = alignStrands(sumstatsData) |
|
|
452 |
# subset sumstats data to the same chrom |
|
|
453 |
sumstatsData = sumstatsData[which(sumstatsData$CHR == chromNum ),] |
|
|
454 |
# merge sumstats with common LD map data |
|
|
455 |
sumstatsData <- merge(chrom_SNPs_df,sumstatsData, by.x="rsid", by.y = "SNP") |
|
|
456 |
# remove duplicates |
|
|
457 |
sumstatsData = sumstatsData[ !duplicated(sumstatsData$rsid) ,] |
|
|
458 |
# use the effect alleles for the sumstats data with the effect allele of the LD mat |
|
|
459 |
# as we are aligning the LD mats against each other, not against the summary stats |
|
|
460 |
# we only use the lFDR /SE from the sumstats, |
|
|
461 |
# which are directionless, so those dont need to be aligned |
|
|
462 |
sumstatsData$A1.x =sumstatsData$a1.x |
|
|
463 |
sumstatsData$A1.y =sumstatsData$a1.y |
|
|
464 |
# make sure the sumstats is ordered the same way as the LD matrix: |
|
|
465 |
sumstatsData = sumstatsData[order(sumstatsData$pop1_id), ] |
|
|
466 |
# subset the LD matrices to the SNPs we actualy have |
|
|
467 |
pop1LDmatrix = pop1LDmatrix[sumstatsData$pop1_id,sumstatsData$pop1_id] |
|
|
468 |
pop2LDmatrix = pop2LDmatrix[sumstatsData$pop2_id,sumstatsData$pop2_id] |
|
|
469 |
# generate the blended LD matrix |
|
|
470 |
cormat = LDRefBlend(pop1LDmatrix,pop2LDmatrix, sumstatsData) |
|
|
471 |
View(cormat) |
|
|
472 |
uninstall() |
|
|
473 |
# II) tests |
|
|
474 |
?shaPRS_adjust |
|
|
475 |
results$lFDRTable |
|
|
476 |
# Test installing from remote |
|
|
477 |
install_github("mkelcb/shaprs") |
|
|
478 |
library("shaPRS") |
|
|
479 |
# Test blend |
|
|
480 |
?shaPRS_blend_overlap |
|
|
481 |
# Test blend |
|
|
482 |
?shaPRS_blend_overlap |
|
|
483 |
# Test blend |
|
|
484 |
?shaPRS_blend_overlap |
|
|
485 |
subphenoLoc <- system.file("extdata", "phenoA_sumstats", package = "shaPRS") |
|
|
486 |
subpheno_otherLoc <- system.file("extdata", "phenoB_sumstats", package = "shaPRS") |
|
|
487 |
blendFactorLoc <- system.file("extdata", "myOutput_SNP_lFDR", package = "shaPRS") |
|
|
488 |
subpheno= read.table(subphenoLoc, header = TRUE) |
|
|
489 |
subpheno_other= read.table(subpheno_otherLoc, header = TRUE) |
|
|
490 |
blendingFactors= read.table(blendFactorLoc, header = TRUE) |
|
|
491 |
blendedSumstats = shaPRS_blend_overlap(subpheno, subpheno_other, blendingFactors) |
|
|
492 |
View(blendedSumstats) |
|
|
493 |
typeof(cormat) |
|
|
494 |
map_rds_new = pop1_map_rds[which(pop1_map_rds$chr == chromNum),] |
|
|
495 |
map_rds_new2 = map_rds_new[which(map_rds_new$rsid %in% sumstatsData$rsid),] # match the first to the second |
|
|
496 |
View(map_rds_new2) |
|
|
497 |
document() |
|
|
498 |
check(cran=TRUE) |
|
|
499 |
# Build package |
|
|
500 |
build() |
|
|
501 |
# Test installing from remote |
|
|
502 |
install_github("mkelcb/shaprs") |
|
|
503 |
uninstall() |
|
|
504 |
# Test installing from remote |
|
|
505 |
install_github("mkelcb/shaprs") |
|
|
506 |
library("shaPRS") |
|
|
507 |
# Test LD ref blend |
|
|
508 |
?LDRefBlend |
|
|
509 |
LDRefBlend |
|
|
510 |
# Test LD ref blend |
|
|
511 |
?LDRefBlend |
|
|
512 |
uninstall() |