Diff of /Functions.R [000000] .. [81de4e]

Switch to unified view

a b/Functions.R
1
# 02/07/11 -- Helper functions.
2
3
GoodnessOfFit <- function(x,type){
4
  ns <- NullModel(x,type=type)$n
5
  return(sum(((x-ns)^2)/ns, na.rm=TRUE))
6
}
7
8
9
10
ColorDendrogram <- function(hc, y, main = "", branchlength = 0.7, labels = NULL, xlab = NULL, sub = NULL, ylab = "", cex.main = NULL){
11
  if (is.null(labels))
12
  labels <- rep("", length(y))
13
  plot(hc, hang = 0.2, main = main, labels = labels, xlab = xlab,sub = sub, ylab = ylab, cex.main = cex.main)
14
  y <- y[hc$ord]
15
  if (is.numeric(y)) {
16
    y <- y + 1
17
    y[y == 7] <- "orange"
18
  }
19
  for (i in 1:length(hc$ord)) {
20
    o = hc$merge[, 1] == -hc$ord[i] | hc$merge[, 2] == -hc$ord[i]
21
    segments(i, hc$he[o] - branchlength, i, hc$he[o], col = y[i])
22
  }
23
}
24
        
25
26
27
permute.rows <- function(x){
28
  dd <- dim(x)
29
  n <- dd[1]
30
  p <- dd[2]
31
  mm <- runif(length(x)) + rep(seq(n) * 10, rep(p, n))
32
  matrix(t(x)[order(mm)], n, p, byrow = T)
33
}
34
35
36
37
balanced.folds <- function(y, nfolds = min(min(table(y)), 10)){
38
  totals <- table(y)
39
  fmax <- max(totals)
40
  nfolds <- min(nfolds, fmax)
41
  # makes no sense to have more folds than the max class size
42
  folds <- as.list(seq(nfolds))
43
  yids <- split(seq(y), y)
44
  # nice way to get the ids in a list, split by class
45
  ###Make a big matrix, with enough rows to get in all the folds per class
46
  bigmat <- matrix(NA, ceiling(fmax/nfolds) * nfolds, length(totals))
47
  for(i in seq(totals)) {
48
    bigmat[seq(totals[i]), i] <- sample(yids[[i]])
49
  }
50
  smallmat <- matrix(bigmat, nrow = nfolds) # reshape the matrix
51
  ### Now do a clever sort to mix up the NAs
52
  smallmat <- permute.rows(t(smallmat)) ### Now a clever unlisting
53
  x <- apply(smallmat, 2, function(x) x[!is.na(x)])
54
   if(is.matrix(x)){
55
    xlist <- list()
56
    for(i in 1:ncol(x)){
57
      xlist[[i]] <- x[,i]
58
    }
59
    return(xlist)
60
  }
61
  return(x)
62
}
63
64
65
66
se <- function(vec){
67
  return(sd(vec)/sqrt(length(vec)))
68
}
69
70
71
72
Soft <- function(x,a){
73
  return(sign(x)*pmax(abs(x)-a,0))
74
}
75
76
77
78
GetD <- function(ns, x, y, rho,beta,rhos=NULL){
79
  if(!is.null(rho) && !is.null(rhos)) stop("do you want to use rho or rhos in GetD function???")
80
  if(is.null(rhos)){
81
    uniq <- sort(unique(y))
82
    ds <- matrix(1, nrow=length(uniq), ncol=ncol(x))
83
    for(k in 1:length(uniq)){
84
      tmp1 <- x[y==uniq[k],]
85
      tmp2 <- ns[y==uniq[k],]
86
      if(class(tmp1) == "numeric"){
87
        tmp1 = matrix(tmp1, nrow = 1)
88
        colnames(tmp1) <- colnames(x)
89
      }
90
      if(class(tmp2) == "numeric"){
91
        tmp2 = matrix(tmp2, nrow = 1)
92
        colnames(tmp2) <- colnames(ns)
93
      }
94
      a <- colSums(tmp1)+beta
95
      b <- colSums(tmp2)+beta
96
      
97
      ds[k,] <- 1+Soft(a/b-1,rho/sqrt(b))
98
    }
99
    return(ds)
100
  } else {
101
    uniq <- sort(unique(y))
102
    ds.list <- list()
103
    for(rho in rhos){
104
      ds <- matrix(1, nrow=length(uniq), ncol=ncol(x))
105
      for(k in 1:length(uniq)){
106
        tmp1 <- x[y==uniq[k],]
107
        tmp2 <- ns[y==uniq[k],]
108
        if(class(tmp1) == "numeric"){
109
          tmp1 = matrix(tmp1, nrow = 1)
110
          colnames(tmp1) <- colnames(x)
111
        }
112
        if(class(tmp2) == "numeric"){
113
          tmp2 = matrix(tmp2, nrow = 1)
114
          colnames(tmp2) <- colnames(ns)
115
        }
116
        
117
        a <- colSums(tmp1)+beta
118
        b <- colSums(tmp2)+beta
119
        ds[k,] <- 1+Soft(a/b-1,rho/sqrt(b))
120
      }
121
      ds.list[[which(rhos==rho)]] <- ds
122
    }
123
    return(ds.list)
124
  }
125
}
126
127
128
129
print.poicla <- function(x,...){
130
  if(is.null(x$rhos) && is.null(x$rho)){
131
    if(!is.null(x[[1]]$alpha)) cat("Value of alpha used to transform data: ", x[[1]]$alpha, fill=TRUE)
132
    if(is.null(x[[1]]$alpha)) cat("No transformation performed.", fill=TRUE)
133
    cat("Type of normalization performed: ", x[[1]]$type, fill=TRUE)
134
    cat("Number of training observations: ", nrow(x[[1]]$x), fill=TRUE)
135
    if(!is.null(x[[1]]$xte)) cat("Number of test observations: ", nrow(x[[1]]$xte), fill=TRUE)
136
    cat("Number of features: ", ncol(x[[1]]$x), fill=TRUE)
137
    cat("-------------------------", fill=TRUE)
138
    cat("-------------------------", fill=TRUE)
139
    for(i in 1:length(x)){
140
      cat("-------------------------", fill=TRUE)
141
      cat("Value of rho used: ", x[[i]]$rho, fill=TRUE)
142
      cat("Number of features used in classifier: ", sum(apply(x[[i]]$ds!=1, 2, sum)!=0), fill=TRUE)
143
      if(sum(apply(x[[i]]$ds!=1, 2, sum)!=0)<100) cat("Indices of features used in classifier: ", which(apply(x[[i]]$ds!=1, 2, sum)!=0), fill=TRUE)
144
    }
145
  } else {
146
    if(!is.null(x$alpha)) cat("Value of alpha used to transform data: ", x$alpha, fill=TRUE)
147
    if(is.null(x$alpha)) cat("No transformation performed.", fill=TRUE)    
148
    cat("Type of normalization performed: ", x$type, fill=TRUE)    
149
    cat("Number of training observations: ", nrow(x$x), fill=TRUE)
150
    if(!is.null(x$xte)) cat("Number of test observations: ", nrow(x$xte), fill=TRUE)
151
    cat("Number of features: ", ncol(x$x), fill=TRUE)
152
    cat("Value of rho used: ", x$rho, fill=TRUE)
153
    cat("Number of features used in classifier: ", sum(apply(x$ds!=1, 2, sum)!=0), fill=TRUE)
154
    if(sum(apply(x$ds!=1, 2, sum)!=0)<100) cat("Indices of features used in classifier: ", which(apply(x$ds!=1, 2, sum)!=0), fill=TRUE)
155
  }
156
}