Diff of /partyMod/R/Variables.R [000000] .. [fbf06f]

Switch to side-by-side view

--- a
+++ b/partyMod/R/Variables.R
@@ -0,0 +1,261 @@
+
+# $Id$
+
+### factor handling
+ff_trafo <- function(x) {
+    ### temporarily define `na.pass' as na.action
+    opt <- options()
+    on.exit(options(opt))
+    options(na.action = na.pass)
+    if (nlevels(x) == 1) {
+        warning("factors at only one level may lead to problems")
+        mm <- matrix(1, nrow = length(x))
+    } else {
+        ### construct design matrix _without_ intercept
+        mm <- model.matrix(~ x - 1)
+    }
+    colnames(mm) <- levels(x)  
+    return(mm)
+}
+
+ptrafo <- function(data, numeric_trafo = id_trafo, factor_trafo = ff_trafo, 
+    ordered_trafo = of_trafo, surv_trafo = logrank_trafo, var_trafo = NULL)
+
+    trafo(data = data, numeric_trafo = numeric_trafo, factor_trafo =
+          factor_trafo, ordered_trafo = ordered_trafo, 
+          surv_trafo = surv_trafo, var_trafo = var_trafo)
+
+
+initVariableFrame.df <- function(obj, trafo = ptrafo, scores = NULL, response = FALSE, ...) {
+
+    if (is.null(trafo)) trafo <- ptrafo
+    if (response) {
+        RET <- new("ResponseFrame", nrow(obj), ncol(obj))
+        tmp <- lapply(obj, function(x) {
+            if (is.factor(x)) return(ff_trafo(x))
+            ### FIXME
+            if (inherits(x, "Surv")) return(logrank_trafo(x))
+            return(x)
+        })
+        RET@predict_trafo <- as.matrix(as.data.frame(tmp))
+        storage.mode(RET@predict_trafo) <- "double"
+    } else {
+        RET <- new("VariableFrame", nrow(obj), ncol(obj))
+    }
+    
+    is_ordinal <- sapply(obj, is.ordered)
+    is_nominal <- sapply(obj, is.factor) & !is_ordinal
+
+    ### assign user-specified scores to variables in `obj'
+    if (!is.null(scores)) {
+        if (!is.list(scores) || is.null(names(scores)))
+            stop(sQuote("scores"), " is not a named list")
+        scores <- scores[names(scores) %in% colnames(obj)]
+    }
+    if (!is.null(scores)) {
+        for (n in names(scores)) {
+            if (!(is.factor(obj[[n]]) && is.ordered(obj[[n]])) || 
+                nlevels(obj[[n]]) != length(scores[[n]]))
+                stop("cannot assign scores to variable ", sQuote(n))
+            if (any(order(scores[[n]]) != 1:length(scores[[n]])))
+                stop("scores are not increasingly ordered")
+            attr(obj[[n]], "scores") <- scores[[n]]
+        }
+    }
+
+    RET@scores <- lapply(obj, function(x) {
+        sc <- 0
+        if (is.ordered(x)) {
+            sc <- attr(x, "scores")
+            if (is.null(sc)) sc <- 1:nlevels(x)
+            storage.mode(sc) <- "double"
+        }
+        sc
+    })
+
+    ### transformations
+    jt <- trafo(obj)
+
+    ### for each variable
+    xt <- vector(mode = "list", length = ncol(obj))
+    for (i in 1:ncol(obj))
+        xt[[i]] <- jt[,attr(jt, "assign") == i, drop = FALSE]
+    rm(jt)
+
+    ### ordering
+    ordering <- lapply(obj, function(x) {
+        if (is.factor(x) && !is.ordered(x)) return(NULL)
+        if (inherits(x, "Surv")) return(NULL)
+        if (is.ordered(x)) return(as.integer(order(as.numeric(x))))
+        as.integer(order(x))
+    })
+
+    ### div.
+    levels <- lapply(obj, function(x) if(is.factor(x)) levels(x))
+    whichNA <- lapply(obj, function(x) which(is.na(x)))
+    has_missings <- sapply(obj, function(x) any(is.na(x)))
+    censored <- sapply(obj, function(x) inherits(x, "Surv"))
+
+    ### some "handwork" 
+    for (j in 1:ncol(obj)) {
+        x <- obj[[j]]
+
+        if (censored[j]) 
+            ordering[[j]] <- as.integer(order(xt[[j]]))
+
+        if (is.factor(x)) {
+            if (is_ordinal[j]) {
+                storage.mode(xt[[j]]) <- "double"
+                ### R 2.5.0 does not allow to change the storage mode of factors
+                class(obj[[j]]) <- "was_ordered"
+                storage.mode(obj[[j]]) <- "double"
+            } else {
+                storage.mode(obj[[j]]) <- "integer"
+            }
+        } else {
+            storage.mode(obj[[j]]) <- "double"
+        }
+        nas <- is.na(x)
+        xt[[j]][nas, drop = FALSE] <- 0
+    }            
+
+    RET@transformations <- xt
+    RET@is_nominal <- is_nominal
+    RET@is_ordinal <- is_ordinal
+    RET@is_censored <- censored
+    RET@variables <- obj
+    RET@levels <- levels
+    RET@ordering <- ordering
+    RET@has_missings <- has_missings
+    RET@whichNA <- whichNA
+
+    if (response) {
+        RET@test_trafo <- as.matrix(as.data.frame(xt))
+        storage.mode(RET@test_trafo) <- "double"
+    }
+    RET
+}
+
+initVariableFrame.matrix <- function(obj, response = FALSE, ...) {
+
+    if (response)
+        return(initVariableFrame(as.data.frame(obj, ..., response = TRUE)))
+
+    storage.mode(obj) <- "double"
+    n <- nrow(obj)
+    p <- ncol(obj)
+    RET <- new("VariableFrame", n, p)
+    is_ordinal <- rep(FALSE, p)
+    is_nominal <- rep(FALSE, p)
+
+    RET@scores <- vector(mode = "list", length = p)
+
+    lobj <- vector(mode = "list", length = p)
+    for (i in 1:p) lobj[[i]] <- obj[,i,drop = FALSE]
+    obj <- lobj
+
+    ### ordering
+    ordering <- lapply(obj, function(x) {
+        as.integer(order(x))
+    })
+
+    ### div.
+    levels <- vector(mode = "list", length = p)
+    whichNA <- lapply(obj, function(x) which(is.na(x)))
+    has_missings <- sapply(obj, function(x) any(is.na(x)))
+    censored <- rep(FALSE, p)
+
+    RET@transformations <- obj
+    RET@is_nominal <- is_nominal
+    RET@is_ordinal <- is_ordinal
+    RET@is_censored <- censored
+    RET@variables <- RET@transformations
+    RET@levels <- levels
+    RET@ordering <- ordering
+    RET@has_missings <- has_missings
+    RET@whichNA <- whichNA
+
+    RET
+}
+
+setGeneric(name = "initVariableFrame",
+           def = function(obj, ...)
+               standardGeneric("initVariableFrame")
+)
+
+setMethod("initVariableFrame", 
+    signature = "data.frame",
+    definition = initVariableFrame.df
+)
+
+setMethod("initVariableFrame", 
+    signature = "matrix",
+    definition = initVariableFrame.matrix
+)
+
+setGeneric(name = "response",
+           def = function(object, ...)
+               standardGeneric("response")
+)
+
+setMethod("response",
+    signature = "BinaryTree",
+    definition = function(object) object@responses@variables
+)
+
+get_variables <- function(x)
+    x@variables
+
+setGeneric(name = "LearningSample",
+           def = function(object, ...)
+               standardGeneric("LearningSample")
+)
+
+LearningSample.matrix <- function(object, response, ...) {
+
+    new("LearningSample", inputs = inp <- initVariableFrame(object), 
+        responses = initVariableFrame(as.data.frame(response), response = TRUE, ...),
+        weights = rep(1, inp@nobs), nobs = inp@nobs,
+        ninputs = inp@ninputs)
+}
+
+setMethod("LearningSample",
+    signature = "matrix",
+    definition = LearningSample.matrix
+)
+
+LearningSample.ModelEnv <- function(object, ...) {
+
+    inp <- initVariableFrame(object@get("input"), ...)
+
+    response <- object@get("response")
+
+    if (any(is.na(response)))
+        stop("missing values in response variable not allowed")
+
+    resp <- initVariableFrame(response, ..., response = TRUE)
+
+    RET <- new("LearningSampleFormula", inputs = inp, responses = resp,
+               weights = rep(1, inp@nobs), nobs = inp@nobs,
+               ninputs = inp@ninputs, menv = object)
+    return(RET)
+}
+
+setMethod("LearningSample",
+    signature = "ModelEnv",
+    definition = LearningSample.ModelEnv
+)
+
+newinputs <- function(object, newdata = NULL) {
+
+    if (is.null(newdata)) return(object@inputs)
+    if (inherits(object, "LearningSampleFormula"))
+        newdata <- object@menv@get("input", data = newdata)
+
+    if (inherits(newdata, "VariableFrame"))
+        return(newdata)
+    if (inherits(newdata, "LearningSample"))
+        return(newdata@inputs)
+
+    return(initVariableFrame(newdata, trafo = ptrafo))
+}