|
a |
|
b/R/select_rho_partial.R |
|
|
1 |
#' @title Data preprocessing for partial correlation analysis |
|
|
2 |
#' @description A method that integrates differential expression (DE) analysis |
|
|
3 |
#' and differential network (DN) analysis to select biomarker candidates for |
|
|
4 |
#' cancer studies. select_rho_partial is the pre-processing step for INDEED |
|
|
5 |
#' partial differential analysis. |
|
|
6 |
#' @param data This is a p*n dataframe that contains the expression levels for all biomolecules and samples. |
|
|
7 |
#' @param class_label This is a 1*n dataframe that contains the class label with 0 for group 1 and 1 for group 2. |
|
|
8 |
#' @param id This is a p*1 dataframe that contains the ID for each biomolecule. |
|
|
9 |
#' @param error_curve This is a boolean value indicating whether to plot the error curve (TRUE) or not (FALSE). |
|
|
10 |
#' The default is TRUE. |
|
|
11 |
#' @examples select_rho_partial(data = Met_GU, class_label = Met_Group_GU, id = Met_name_GU, |
|
|
12 |
#' error_curve = TRUE) |
|
|
13 |
#' @return A list of processed data for the next step, and an error curve to select optimal rho value |
|
|
14 |
#' for graphical lasso. |
|
|
15 |
#' @import devtools |
|
|
16 |
#' @importFrom glasso glasso |
|
|
17 |
#' @importFrom stats qnorm cor quantile var sd glm |
|
|
18 |
#' @importFrom graphics abline title plot lines |
|
|
19 |
#' @export |
|
|
20 |
|
|
|
21 |
select_rho_partial <- function(data = NULL, class_label = NULL, id = NULL, error_curve = TRUE){ |
|
|
22 |
data_bind <- rbind(data, class_label) |
|
|
23 |
# Group 1: p*n1 |
|
|
24 |
raw_group_1 <- data_bind[,data_bind[nrow(data_bind),] == 0][1:(nrow(data_bind) - 1),] |
|
|
25 |
# Group 2: p*n2 |
|
|
26 |
raw_group_2 <- data_bind[,data_bind[nrow(data_bind),] == 1][1:(nrow(data_bind) - 1),] |
|
|
27 |
p <- nrow(raw_group_2) |
|
|
28 |
n_group_1 <- ncol(raw_group_1) |
|
|
29 |
n_group_2 <- ncol(raw_group_2) |
|
|
30 |
|
|
|
31 |
|
|
|
32 |
# Z-transform the data for group-specific normalization |
|
|
33 |
data_group_1 <- scale(t(raw_group_1)) # Group 1: n1*p |
|
|
34 |
data_group_2 <- scale(t(raw_group_2)) # Group 2: n2*p |
|
|
35 |
cov_group_1 <- var(data_group_1) |
|
|
36 |
cov_group_2 <- var(data_group_2) |
|
|
37 |
|
|
|
38 |
## Apply grapihcal LASSO given data_group_1 and data_group_2 |
|
|
39 |
# Group 1 first |
|
|
40 |
n_fold <- 5 # number of folds |
|
|
41 |
rho <- exp(seq(log(1), log(0.01), length.out = 20)) |
|
|
42 |
|
|
|
43 |
# group 1 |
|
|
44 |
# draw error curve |
|
|
45 |
error_group1 <- choose_rho(data_group_1, n_fold, rho) |
|
|
46 |
if(error_curve == TRUE){ |
|
|
47 |
par(mfrow = c(1, 2)) |
|
|
48 |
plot(rho, error_group1$log.cv, xlab = expression(rho), ylab = "Error") |
|
|
49 |
lines(rho, error_group1$log.cv) |
|
|
50 |
title(main = paste("Group 1 error curve", "using cross validation", sep = "\n")) |
|
|
51 |
# chosse optimal rho |
|
|
52 |
abline(v = rho[error_group1$log.cv == min(error_group1$log.cv)], col = "red", lty = 3) |
|
|
53 |
# one standard error rule |
|
|
54 |
abline(h = min(error_group1$log.cv) + |
|
|
55 |
error_group1$log.rho[error_group1$log.cv == min(error_group1$log.cv)], |
|
|
56 |
col = "blue") |
|
|
57 |
} |
|
|
58 |
rho[error_group1$log.cv == min(error_group1$log.cv)] # rho based on minimum rule |
|
|
59 |
|
|
|
60 |
# rhos that are under blue line |
|
|
61 |
rho_under_blue <- rho[error_group1$log.cv < |
|
|
62 |
min(error_group1$log.cv) + |
|
|
63 |
error_group1$log.rho[error_group1$log.cv == |
|
|
64 |
min(error_group1$log.cv)]] |
|
|
65 |
|
|
|
66 |
# rhos that are on the right of the red line |
|
|
67 |
rho_right_red <- rho[rho > rho[error_group1$log.cv == min(error_group1$log.cv)]] |
|
|
68 |
|
|
|
69 |
#rhos that are under blue line and to the right of the red line |
|
|
70 |
rho_one_std_group1 <- max(intersect(rho_under_blue, rho_right_red )) |
|
|
71 |
rho_min_rule_group1 <- rho[error_group1$log.cv == min(error_group1$log.cv)] |
|
|
72 |
|
|
|
73 |
# group 2 |
|
|
74 |
# draw error curve |
|
|
75 |
error_group2 <- choose_rho(data_group_2, n_fold, rho) |
|
|
76 |
if(error_curve == TRUE){ |
|
|
77 |
plot(rho, error_group2$log.cv, xlab = expression(rho), ylab = "Error") |
|
|
78 |
lines(rho, error_group2$log.cv) |
|
|
79 |
title(main = paste("Group 2 error curve", "using cross validation", sep="\n")) |
|
|
80 |
# choOse optimal rho |
|
|
81 |
abline(v = rho[error_group2$log.cv == min(error_group2$log.cv)], col = "red", lty = 3) |
|
|
82 |
# one standard error rule |
|
|
83 |
abline(h = min(error_group2$log.cv) + |
|
|
84 |
error_group2$log.rho[error_group2$log.cv == |
|
|
85 |
min(error_group2$log.cv)], col = "blue") |
|
|
86 |
} |
|
|
87 |
rho[error_group2$log.cv == min(error_group2$log.cv)] # rho based on minimum rule |
|
|
88 |
|
|
|
89 |
# rhos that are under blue line |
|
|
90 |
rho_under_blue <- rho[error_group2$log.cv < min(error_group2$log.cv) + |
|
|
91 |
error_group2$log.rho[error_group2$log.cv == |
|
|
92 |
min(error_group2$log.cv)]] |
|
|
93 |
|
|
|
94 |
# rhos that are on the right of the red line |
|
|
95 |
rho_right_red <- rho[rho > rho[error_group2$log.cv == min(error_group2$log.cv)]] |
|
|
96 |
|
|
|
97 |
#rhos that are under blue line and to the right of the red line |
|
|
98 |
rho_one_std_group2 <- max(intersect(rho_under_blue, rho_right_red )) # export as global |
|
|
99 |
rho_min_rule_group2 <- rho[error_group2$log.cv == min(error_group2$log.cv)] # export as global |
|
|
100 |
|
|
|
101 |
rho_df <- data.frame(c(rho_one_std_group1, rho_min_rule_group1), c(rho_one_std_group2, |
|
|
102 |
rho_min_rule_group2), |
|
|
103 |
row.names = c("one standard error","minimum")) |
|
|
104 |
colnames(rho_df)<-c('group1', "group2") |
|
|
105 |
|
|
|
106 |
data_list <- list(p = p, rho_table = rho_df, cov_group_1 = cov_group_1, |
|
|
107 |
cov_group_2 = cov_group_2, n_group_1 = n_group_1, n_group_2 = n_group_2, |
|
|
108 |
data_group_1 = data_group_1, data_group_2 = data_group_2, |
|
|
109 |
class_label = class_label, id = id, data = data) |
|
|
110 |
return(data_list) |
|
|
111 |
} |