Diff of /R/utilities.R [000000] .. [727782]

Switch to unified view

a b/R/utilities.R
1
get_AICtab<-function(fit){
2
  
3
  ########################
4
  # Flag invalid options #
5
  ########################
6
  
7
  if (!class(fit) %in% c('cpglm', 'zcpglm', 'glmmTMB')){
8
    stop('Not supported. Valid options are cplm , zcpglm, and glmmTMB')
9
  }
10
  
11
  ######################
12
  #  Initialize AICtab #
13
  ######################
14
  
15
  AICtab<-rep(NA, 5)
16
  
17
  ###########################
18
  # Case-by-Case Extraction #
19
  ###########################
20
  
21
  if (class(fit)=='cpglm'){
22
    
23
    ##########################################
24
    # Back calculate logLik and BIC from AIC #
25
    ##########################################
26
    
27
    AIC<-fit$aic
28
    AIC_multiplier<-length(fit$y) - fit$df.residual
29
    logLik<-(AIC - 2*AIC_multiplier)/2
30
    BIC_multiplier<-AIC_multiplier*log(length(fit$y))
31
    BIC<-BIC_multiplier + 2*logLik
32
    deviance<-fit$deviance
33
    df.resid<-fit$df.residual
34
    
35
    # Coherent output
36
    AICtab<-c(AIC, BIC, logLik, deviance, df.resid)
37
  }
38
  
39
  if (class(fit)=='zcpglm'){
40
    
41
    ##########################################
42
    # Back calculate AIC and BIC from logLik #
43
    ##########################################
44
    
45
    logLik<--fit$llik
46
    AIC_multiplier<-length(fit$y) - fit$df.residual
47
    BIC_multiplier<-AIC_multiplier*log(length(fit$y))
48
    AIC<-2*AIC_multiplier + 2*logLik
49
    BIC<-BIC_multiplier + 2*logLik
50
    deviance<-NA
51
    df.resid<-fit$df.residual
52
    
53
    # Coherent output
54
    AICtab<-c(AIC, BIC, logLik, deviance, df.resid)
55
    
56
  }
57
  
58
  if (class(fit)=='glmmTMB'){
59
    
60
    #######################################
61
    # Extract AICtab from glmmTMB objects #
62
    #######################################
63
    
64
    AICtab<-summary(fit)["AICtab"]$AICtab
65
    
66
  }
67
  
68
  ##########
69
  # Return #
70
  ##########
71
  
72
  names(AICtab)<-c('AIC', 'BIC', 'logLik', 'deviance', 'df.resid')
73
  return(AICtab)
74
}
75
76
77
78
79
# Adapted form: https://rstudio-pubs-static.s3.amazonaws.com/455435_30729e265f7a4d049400d03a18e218db.html
80
81
#' @export
82
entropy <- function(target) {
83
  #if(all(is.na(target)))  0 
84
  freq <- table(target)/length(target)
85
  # vectorize
86
  vec <- as.data.frame(freq)[,2]
87
  #drop 0 to avoid NaN resulting from log2
88
  vec<-vec[vec>0]
89
  #compute entropy
90
  -sum(vec * log2(vec))
91
}
92
93
IG_numeric<-function(data, feature, target, bins=4) {
94
  #Strip out rows where feature is NA
95
  data<-data[!is.na(data[,feature]),]
96
  #compute entropy for the parent
97
  e0<-entropy(data[,target])
98
  
99
  data$cat<-cut(data[,feature], breaks=bins, labels=c(1:bins))
100
  
101
  #use dplyr to compute e and p for each value of the feature
102
  dd_data <- data %>% group_by(cat) %>% summarise(e=entropy(get(target)), 
103
                                                  n=length(get(target)),
104
                                                  min=min(get(feature)),
105
                                                  max=max(get(feature))
106
  )
107
  
108
  #calculate p for each value of feature
109
  dd_data$p<-dd_data$n/nrow(data)
110
  #compute IG
111
  IG<-e0-sum(dd_data$p*dd_data$e)
112
  
113
  return(IG)
114
}
115
116
117
118
#returns IG for categorical variables.
119
IG_cat<-function(data,feature,target){
120
  #Strip out rows where feature is NA
121
  data<-data[!is.na(data[,feature]),] 
122
  #use dplyr to compute e and p for each value of the feature
123
  dd_data <- data %>% group_by_at(feature) %>% summarise(e=entropy(get(target)), 
124
                                                         n=length(get(target))
125
  )
126
  
127
  #compute entropy for the parent
128
  e0<-entropy(data[,target])
129
  #calculate p for each value of feature
130
  dd_data$p<-dd_data$n/nrow(data)
131
  #compute IG
132
  IG<-e0-sum(dd_data$p*dd_data$e)
133
  
134
  return(IG)
135
}
136
137
# entropy (c("A", "A", "A", "A", "A", "B", "B"))
138
# 0.8631206
139
140
#entropy (c("A", "A", "A", "A"))
141
# 0
142
143
#entropy (c("A", "A", "A", "A", "B", "B", "B", "B"))
144
#1
145
146
#entropy (c("C", "A", "A", "A", "B", "B", "B", "B"))
147
# 1.405639
148
149
#entropy (c("C", "A", "D", "A", "B", "B", "B", "B"))
150
# 1.75
151
152
# entropy (c(1, 1, 2, 1, 1, 1, 2, 1))
153
#0.8112781
154
155
156
# Written by Grace
157
extractAssay <- function(input, assay_name = "counts") {
158
  
159
  # Extract assay name based on the user input
160
  if ("counts" %in% assayNames(input)) {
161
    counts_data <- assay(input, assay_name)
162
    cat("The specified assay has been extracted\n")
163
    return(as.data.frame(as.matrix(counts_data)))
164
  } else {
165
    cat("The specified assay was not found\n")
166
    return(NULL)
167
  }
168
}