|
a |
|
b/Functions.R |
|
|
1 |
# 02/07/11 -- Helper functions. |
|
|
2 |
|
|
|
3 |
GoodnessOfFit <- function(x,type){ |
|
|
4 |
ns <- NullModel(x,type=type)$n |
|
|
5 |
return(sum(((x-ns)^2)/ns, na.rm=TRUE)) |
|
|
6 |
} |
|
|
7 |
|
|
|
8 |
|
|
|
9 |
|
|
|
10 |
ColorDendrogram <- function(hc, y, main = "", branchlength = 0.7, labels = NULL, xlab = NULL, sub = NULL, ylab = "", cex.main = NULL){ |
|
|
11 |
if (is.null(labels)) |
|
|
12 |
labels <- rep("", length(y)) |
|
|
13 |
plot(hc, hang = 0.2, main = main, labels = labels, xlab = xlab,sub = sub, ylab = ylab, cex.main = cex.main) |
|
|
14 |
y <- y[hc$ord] |
|
|
15 |
if (is.numeric(y)) { |
|
|
16 |
y <- y + 1 |
|
|
17 |
y[y == 7] <- "orange" |
|
|
18 |
} |
|
|
19 |
for (i in 1:length(hc$ord)) { |
|
|
20 |
o = hc$merge[, 1] == -hc$ord[i] | hc$merge[, 2] == -hc$ord[i] |
|
|
21 |
segments(i, hc$he[o] - branchlength, i, hc$he[o], col = y[i]) |
|
|
22 |
} |
|
|
23 |
} |
|
|
24 |
|
|
|
25 |
|
|
|
26 |
|
|
|
27 |
permute.rows <- function(x){ |
|
|
28 |
dd <- dim(x) |
|
|
29 |
n <- dd[1] |
|
|
30 |
p <- dd[2] |
|
|
31 |
mm <- runif(length(x)) + rep(seq(n) * 10, rep(p, n)) |
|
|
32 |
matrix(t(x)[order(mm)], n, p, byrow = T) |
|
|
33 |
} |
|
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
|
37 |
balanced.folds <- function(y, nfolds = min(min(table(y)), 10)){ |
|
|
38 |
totals <- table(y) |
|
|
39 |
fmax <- max(totals) |
|
|
40 |
nfolds <- min(nfolds, fmax) |
|
|
41 |
# makes no sense to have more folds than the max class size |
|
|
42 |
folds <- as.list(seq(nfolds)) |
|
|
43 |
yids <- split(seq(y), y) |
|
|
44 |
# nice way to get the ids in a list, split by class |
|
|
45 |
###Make a big matrix, with enough rows to get in all the folds per class |
|
|
46 |
bigmat <- matrix(NA, ceiling(fmax/nfolds) * nfolds, length(totals)) |
|
|
47 |
for(i in seq(totals)) { |
|
|
48 |
bigmat[seq(totals[i]), i] <- sample(yids[[i]]) |
|
|
49 |
} |
|
|
50 |
smallmat <- matrix(bigmat, nrow = nfolds) # reshape the matrix |
|
|
51 |
### Now do a clever sort to mix up the NAs |
|
|
52 |
smallmat <- permute.rows(t(smallmat)) ### Now a clever unlisting |
|
|
53 |
x <- apply(smallmat, 2, function(x) x[!is.na(x)]) |
|
|
54 |
if(is.matrix(x)){ |
|
|
55 |
xlist <- list() |
|
|
56 |
for(i in 1:ncol(x)){ |
|
|
57 |
xlist[[i]] <- x[,i] |
|
|
58 |
} |
|
|
59 |
return(xlist) |
|
|
60 |
} |
|
|
61 |
return(x) |
|
|
62 |
} |
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
|
66 |
se <- function(vec){ |
|
|
67 |
return(sd(vec)/sqrt(length(vec))) |
|
|
68 |
} |
|
|
69 |
|
|
|
70 |
|
|
|
71 |
|
|
|
72 |
Soft <- function(x,a){ |
|
|
73 |
return(sign(x)*pmax(abs(x)-a,0)) |
|
|
74 |
} |
|
|
75 |
|
|
|
76 |
|
|
|
77 |
|
|
|
78 |
GetD <- function(ns, x, y, rho,beta,rhos=NULL){ |
|
|
79 |
if(!is.null(rho) && !is.null(rhos)) stop("do you want to use rho or rhos in GetD function???") |
|
|
80 |
if(is.null(rhos)){ |
|
|
81 |
uniq <- sort(unique(y)) |
|
|
82 |
ds <- matrix(1, nrow=length(uniq), ncol=ncol(x)) |
|
|
83 |
for(k in 1:length(uniq)){ |
|
|
84 |
tmp1 <- x[y==uniq[k],] |
|
|
85 |
tmp2 <- ns[y==uniq[k],] |
|
|
86 |
if(class(tmp1) == "numeric"){ |
|
|
87 |
tmp1 = matrix(tmp1, nrow = 1) |
|
|
88 |
colnames(tmp1) <- colnames(x) |
|
|
89 |
} |
|
|
90 |
if(class(tmp2) == "numeric"){ |
|
|
91 |
tmp2 = matrix(tmp2, nrow = 1) |
|
|
92 |
colnames(tmp2) <- colnames(ns) |
|
|
93 |
} |
|
|
94 |
a <- colSums(tmp1)+beta |
|
|
95 |
b <- colSums(tmp2)+beta |
|
|
96 |
|
|
|
97 |
ds[k,] <- 1+Soft(a/b-1,rho/sqrt(b)) |
|
|
98 |
} |
|
|
99 |
return(ds) |
|
|
100 |
} else { |
|
|
101 |
uniq <- sort(unique(y)) |
|
|
102 |
ds.list <- list() |
|
|
103 |
for(rho in rhos){ |
|
|
104 |
ds <- matrix(1, nrow=length(uniq), ncol=ncol(x)) |
|
|
105 |
for(k in 1:length(uniq)){ |
|
|
106 |
tmp1 <- x[y==uniq[k],] |
|
|
107 |
tmp2 <- ns[y==uniq[k],] |
|
|
108 |
if(class(tmp1) == "numeric"){ |
|
|
109 |
tmp1 = matrix(tmp1, nrow = 1) |
|
|
110 |
colnames(tmp1) <- colnames(x) |
|
|
111 |
} |
|
|
112 |
if(class(tmp2) == "numeric"){ |
|
|
113 |
tmp2 = matrix(tmp2, nrow = 1) |
|
|
114 |
colnames(tmp2) <- colnames(ns) |
|
|
115 |
} |
|
|
116 |
|
|
|
117 |
a <- colSums(tmp1)+beta |
|
|
118 |
b <- colSums(tmp2)+beta |
|
|
119 |
ds[k,] <- 1+Soft(a/b-1,rho/sqrt(b)) |
|
|
120 |
} |
|
|
121 |
ds.list[[which(rhos==rho)]] <- ds |
|
|
122 |
} |
|
|
123 |
return(ds.list) |
|
|
124 |
} |
|
|
125 |
} |
|
|
126 |
|
|
|
127 |
|
|
|
128 |
|
|
|
129 |
print.poicla <- function(x,...){ |
|
|
130 |
if(is.null(x$rhos) && is.null(x$rho)){ |
|
|
131 |
if(!is.null(x[[1]]$alpha)) cat("Value of alpha used to transform data: ", x[[1]]$alpha, fill=TRUE) |
|
|
132 |
if(is.null(x[[1]]$alpha)) cat("No transformation performed.", fill=TRUE) |
|
|
133 |
cat("Type of normalization performed: ", x[[1]]$type, fill=TRUE) |
|
|
134 |
cat("Number of training observations: ", nrow(x[[1]]$x), fill=TRUE) |
|
|
135 |
if(!is.null(x[[1]]$xte)) cat("Number of test observations: ", nrow(x[[1]]$xte), fill=TRUE) |
|
|
136 |
cat("Number of features: ", ncol(x[[1]]$x), fill=TRUE) |
|
|
137 |
cat("-------------------------", fill=TRUE) |
|
|
138 |
cat("-------------------------", fill=TRUE) |
|
|
139 |
for(i in 1:length(x)){ |
|
|
140 |
cat("-------------------------", fill=TRUE) |
|
|
141 |
cat("Value of rho used: ", x[[i]]$rho, fill=TRUE) |
|
|
142 |
cat("Number of features used in classifier: ", sum(apply(x[[i]]$ds!=1, 2, sum)!=0), fill=TRUE) |
|
|
143 |
if(sum(apply(x[[i]]$ds!=1, 2, sum)!=0)<100) cat("Indices of features used in classifier: ", which(apply(x[[i]]$ds!=1, 2, sum)!=0), fill=TRUE) |
|
|
144 |
} |
|
|
145 |
} else { |
|
|
146 |
if(!is.null(x$alpha)) cat("Value of alpha used to transform data: ", x$alpha, fill=TRUE) |
|
|
147 |
if(is.null(x$alpha)) cat("No transformation performed.", fill=TRUE) |
|
|
148 |
cat("Type of normalization performed: ", x$type, fill=TRUE) |
|
|
149 |
cat("Number of training observations: ", nrow(x$x), fill=TRUE) |
|
|
150 |
if(!is.null(x$xte)) cat("Number of test observations: ", nrow(x$xte), fill=TRUE) |
|
|
151 |
cat("Number of features: ", ncol(x$x), fill=TRUE) |
|
|
152 |
cat("Value of rho used: ", x$rho, fill=TRUE) |
|
|
153 |
cat("Number of features used in classifier: ", sum(apply(x$ds!=1, 2, sum)!=0), fill=TRUE) |
|
|
154 |
if(sum(apply(x$ds!=1, 2, sum)!=0)<100) cat("Indices of features used in classifier: ", which(apply(x$ds!=1, 2, sum)!=0), fill=TRUE) |
|
|
155 |
} |
|
|
156 |
} |