Tweedieverse / Git / Diff of /R/utilities.R

Models:
AlyssaS/
Tweedieverse
Downloads: 1
Diff of /R/utilities.R [000000] .. [727782]
Switch to side-by-side view

--- a
+++ b/R/utilities.R
@@ -0,0 +1,168 @@
+get_AICtab<-function(fit){
+  
+  ########################
+  # Flag invalid options #
+  ########################
+  
+  if (!class(fit) %in% c('cpglm', 'zcpglm', 'glmmTMB')){
+    stop('Not supported. Valid options are cplm , zcpglm, and glmmTMB')
+  }
+  
+  ######################
+  #  Initialize AICtab #
+  ######################
+  
+  AICtab<-rep(NA, 5)
+  
+  ###########################
+  # Case-by-Case Extraction #
+  ###########################
+  
+  if (class(fit)=='cpglm'){
+    
+    ##########################################
+    # Back calculate logLik and BIC from AIC #
+    ##########################################
+    
+    AIC<-fit$aic
+    AIC_multiplier<-length(fit$y) - fit$df.residual
+    logLik<-(AIC - 2*AIC_multiplier)/2
+    BIC_multiplier<-AIC_multiplier*log(length(fit$y))
+    BIC<-BIC_multiplier + 2*logLik
+    deviance<-fit$deviance
+    df.resid<-fit$df.residual
+    
+    # Coherent output
+    AICtab<-c(AIC, BIC, logLik, deviance, df.resid)
+  }
+  
+  if (class(fit)=='zcpglm'){
+    
+    ##########################################
+    # Back calculate AIC and BIC from logLik #
+    ##########################################
+    
+    logLik<--fit$llik
+    AIC_multiplier<-length(fit$y) - fit$df.residual
+    BIC_multiplier<-AIC_multiplier*log(length(fit$y))
+    AIC<-2*AIC_multiplier + 2*logLik
+    BIC<-BIC_multiplier + 2*logLik
+    deviance<-NA
+    df.resid<-fit$df.residual
+    
+    # Coherent output
+    AICtab<-c(AIC, BIC, logLik, deviance, df.resid)
+    
+  }
+  
+  if (class(fit)=='glmmTMB'){
+    
+    #######################################
+    # Extract AICtab from glmmTMB objects #
+    #######################################
+    
+    AICtab<-summary(fit)["AICtab"]$AICtab
+    
+  }
+  
+  ##########
+  # Return #
+  ##########
+  
+  names(AICtab)<-c('AIC', 'BIC', 'logLik', 'deviance', 'df.resid')
+  return(AICtab)
+}
+
+
+
+
+# Adapted form: https://rstudio-pubs-static.s3.amazonaws.com/455435_30729e265f7a4d049400d03a18e218db.html
+
+#' @export
+entropy <- function(target) {
+  #if(all(is.na(target)))  0 
+  freq <- table(target)/length(target)
+  # vectorize
+  vec <- as.data.frame(freq)[,2]
+  #drop 0 to avoid NaN resulting from log2
+  vec<-vec[vec>0]
+  #compute entropy
+  -sum(vec * log2(vec))
+}
+
+IG_numeric<-function(data, feature, target, bins=4) {
+  #Strip out rows where feature is NA
+  data<-data[!is.na(data[,feature]),]
+  #compute entropy for the parent
+  e0<-entropy(data[,target])
+  
+  data$cat<-cut(data[,feature], breaks=bins, labels=c(1:bins))
+  
+  #use dplyr to compute e and p for each value of the feature
+  dd_data <- data %>% group_by(cat) %>% summarise(e=entropy(get(target)), 
+                                                  n=length(get(target)),
+                                                  min=min(get(feature)),
+                                                  max=max(get(feature))
+  )
+  
+  #calculate p for each value of feature
+  dd_data$p<-dd_data$n/nrow(data)
+  #compute IG
+  IG<-e0-sum(dd_data$p*dd_data$e)
+  
+  return(IG)
+}
+
+
+
+#returns IG for categorical variables.
+IG_cat<-function(data,feature,target){
+  #Strip out rows where feature is NA
+  data<-data[!is.na(data[,feature]),] 
+  #use dplyr to compute e and p for each value of the feature
+  dd_data <- data %>% group_by_at(feature) %>% summarise(e=entropy(get(target)), 
+                                                         n=length(get(target))
+  )
+  
+  #compute entropy for the parent
+  e0<-entropy(data[,target])
+  #calculate p for each value of feature
+  dd_data$p<-dd_data$n/nrow(data)
+  #compute IG
+  IG<-e0-sum(dd_data$p*dd_data$e)
+  
+  return(IG)
+}
+
+# entropy (c("A", "A", "A", "A", "A", "B", "B"))
+# 0.8631206
+
+#entropy (c("A", "A", "A", "A"))
+# 0
+
+#entropy (c("A", "A", "A", "A", "B", "B", "B", "B"))
+#1
+
+#entropy (c("C", "A", "A", "A", "B", "B", "B", "B"))
+# 1.405639
+
+#entropy (c("C", "A", "D", "A", "B", "B", "B", "B"))
+# 1.75
+
+# entropy (c(1, 1, 2, 1, 1, 1, 2, 1))
+#0.8112781
+
+
+# Written by Grace
+extractAssay <- function(input, assay_name = "counts") {
+  
+  # Extract assay name based on the user input
+  if ("counts" %in% assayNames(input)) {
+    counts_data <- assay(input, assay_name)
+    cat("The specified assay has been extracted\n")
+    return(as.data.frame(as.matrix(counts_data)))
+  } else {
+    cat("The specified assay was not found\n")
+    return(NULL)
+  }
+}