Diff of /partyMod/R/Variables.R [000000] .. [fbf06f]

Switch to unified view

a b/partyMod/R/Variables.R
1
2
# $Id$
3
4
### factor handling
5
ff_trafo <- function(x) {
6
    ### temporarily define `na.pass' as na.action
7
    opt <- options()
8
    on.exit(options(opt))
9
    options(na.action = na.pass)
10
    if (nlevels(x) == 1) {
11
        warning("factors at only one level may lead to problems")
12
        mm <- matrix(1, nrow = length(x))
13
    } else {
14
        ### construct design matrix _without_ intercept
15
        mm <- model.matrix(~ x - 1)
16
    }
17
    colnames(mm) <- levels(x)  
18
    return(mm)
19
}
20
21
ptrafo <- function(data, numeric_trafo = id_trafo, factor_trafo = ff_trafo, 
22
    ordered_trafo = of_trafo, surv_trafo = logrank_trafo, var_trafo = NULL)
23
24
    trafo(data = data, numeric_trafo = numeric_trafo, factor_trafo =
25
          factor_trafo, ordered_trafo = ordered_trafo, 
26
          surv_trafo = surv_trafo, var_trafo = var_trafo)
27
28
29
initVariableFrame.df <- function(obj, trafo = ptrafo, scores = NULL, response = FALSE, ...) {
30
31
    if (is.null(trafo)) trafo <- ptrafo
32
    if (response) {
33
        RET <- new("ResponseFrame", nrow(obj), ncol(obj))
34
        tmp <- lapply(obj, function(x) {
35
            if (is.factor(x)) return(ff_trafo(x))
36
            ### FIXME
37
            if (inherits(x, "Surv")) return(logrank_trafo(x))
38
            return(x)
39
        })
40
        RET@predict_trafo <- as.matrix(as.data.frame(tmp))
41
        storage.mode(RET@predict_trafo) <- "double"
42
    } else {
43
        RET <- new("VariableFrame", nrow(obj), ncol(obj))
44
    }
45
    
46
    is_ordinal <- sapply(obj, is.ordered)
47
    is_nominal <- sapply(obj, is.factor) & !is_ordinal
48
49
    ### assign user-specified scores to variables in `obj'
50
    if (!is.null(scores)) {
51
        if (!is.list(scores) || is.null(names(scores)))
52
            stop(sQuote("scores"), " is not a named list")
53
        scores <- scores[names(scores) %in% colnames(obj)]
54
    }
55
    if (!is.null(scores)) {
56
        for (n in names(scores)) {
57
            if (!(is.factor(obj[[n]]) && is.ordered(obj[[n]])) || 
58
                nlevels(obj[[n]]) != length(scores[[n]]))
59
                stop("cannot assign scores to variable ", sQuote(n))
60
            if (any(order(scores[[n]]) != 1:length(scores[[n]])))
61
                stop("scores are not increasingly ordered")
62
            attr(obj[[n]], "scores") <- scores[[n]]
63
        }
64
    }
65
66
    RET@scores <- lapply(obj, function(x) {
67
        sc <- 0
68
        if (is.ordered(x)) {
69
            sc <- attr(x, "scores")
70
            if (is.null(sc)) sc <- 1:nlevels(x)
71
            storage.mode(sc) <- "double"
72
        }
73
        sc
74
    })
75
76
    ### transformations
77
    jt <- trafo(obj)
78
79
    ### for each variable
80
    xt <- vector(mode = "list", length = ncol(obj))
81
    for (i in 1:ncol(obj))
82
        xt[[i]] <- jt[,attr(jt, "assign") == i, drop = FALSE]
83
    rm(jt)
84
85
    ### ordering
86
    ordering <- lapply(obj, function(x) {
87
        if (is.factor(x) && !is.ordered(x)) return(NULL)
88
        if (inherits(x, "Surv")) return(NULL)
89
        if (is.ordered(x)) return(as.integer(order(as.numeric(x))))
90
        as.integer(order(x))
91
    })
92
93
    ### div.
94
    levels <- lapply(obj, function(x) if(is.factor(x)) levels(x))
95
    whichNA <- lapply(obj, function(x) which(is.na(x)))
96
    has_missings <- sapply(obj, function(x) any(is.na(x)))
97
    censored <- sapply(obj, function(x) inherits(x, "Surv"))
98
99
    ### some "handwork" 
100
    for (j in 1:ncol(obj)) {
101
        x <- obj[[j]]
102
103
        if (censored[j]) 
104
            ordering[[j]] <- as.integer(order(xt[[j]]))
105
106
        if (is.factor(x)) {
107
            if (is_ordinal[j]) {
108
                storage.mode(xt[[j]]) <- "double"
109
                ### R 2.5.0 does not allow to change the storage mode of factors
110
                class(obj[[j]]) <- "was_ordered"
111
                storage.mode(obj[[j]]) <- "double"
112
            } else {
113
                storage.mode(obj[[j]]) <- "integer"
114
            }
115
        } else {
116
            storage.mode(obj[[j]]) <- "double"
117
        }
118
        nas <- is.na(x)
119
        xt[[j]][nas, drop = FALSE] <- 0
120
    }            
121
122
    RET@transformations <- xt
123
    RET@is_nominal <- is_nominal
124
    RET@is_ordinal <- is_ordinal
125
    RET@is_censored <- censored
126
    RET@variables <- obj
127
    RET@levels <- levels
128
    RET@ordering <- ordering
129
    RET@has_missings <- has_missings
130
    RET@whichNA <- whichNA
131
132
    if (response) {
133
        RET@test_trafo <- as.matrix(as.data.frame(xt))
134
        storage.mode(RET@test_trafo) <- "double"
135
    }
136
    RET
137
}
138
139
initVariableFrame.matrix <- function(obj, response = FALSE, ...) {
140
141
    if (response)
142
        return(initVariableFrame(as.data.frame(obj, ..., response = TRUE)))
143
144
    storage.mode(obj) <- "double"
145
    n <- nrow(obj)
146
    p <- ncol(obj)
147
    RET <- new("VariableFrame", n, p)
148
    is_ordinal <- rep(FALSE, p)
149
    is_nominal <- rep(FALSE, p)
150
151
    RET@scores <- vector(mode = "list", length = p)
152
153
    lobj <- vector(mode = "list", length = p)
154
    for (i in 1:p) lobj[[i]] <- obj[,i,drop = FALSE]
155
    obj <- lobj
156
157
    ### ordering
158
    ordering <- lapply(obj, function(x) {
159
        as.integer(order(x))
160
    })
161
162
    ### div.
163
    levels <- vector(mode = "list", length = p)
164
    whichNA <- lapply(obj, function(x) which(is.na(x)))
165
    has_missings <- sapply(obj, function(x) any(is.na(x)))
166
    censored <- rep(FALSE, p)
167
168
    RET@transformations <- obj
169
    RET@is_nominal <- is_nominal
170
    RET@is_ordinal <- is_ordinal
171
    RET@is_censored <- censored
172
    RET@variables <- RET@transformations
173
    RET@levels <- levels
174
    RET@ordering <- ordering
175
    RET@has_missings <- has_missings
176
    RET@whichNA <- whichNA
177
178
    RET
179
}
180
181
setGeneric(name = "initVariableFrame",
182
           def = function(obj, ...)
183
               standardGeneric("initVariableFrame")
184
)
185
186
setMethod("initVariableFrame", 
187
    signature = "data.frame",
188
    definition = initVariableFrame.df
189
)
190
191
setMethod("initVariableFrame", 
192
    signature = "matrix",
193
    definition = initVariableFrame.matrix
194
)
195
196
setGeneric(name = "response",
197
           def = function(object, ...)
198
               standardGeneric("response")
199
)
200
201
setMethod("response",
202
    signature = "BinaryTree",
203
    definition = function(object) object@responses@variables
204
)
205
206
get_variables <- function(x)
207
    x@variables
208
209
setGeneric(name = "LearningSample",
210
           def = function(object, ...)
211
               standardGeneric("LearningSample")
212
)
213
214
LearningSample.matrix <- function(object, response, ...) {
215
216
    new("LearningSample", inputs = inp <- initVariableFrame(object), 
217
        responses = initVariableFrame(as.data.frame(response), response = TRUE, ...),
218
        weights = rep(1, inp@nobs), nobs = inp@nobs,
219
        ninputs = inp@ninputs)
220
}
221
222
setMethod("LearningSample",
223
    signature = "matrix",
224
    definition = LearningSample.matrix
225
)
226
227
LearningSample.ModelEnv <- function(object, ...) {
228
229
    inp <- initVariableFrame(object@get("input"), ...)
230
231
    response <- object@get("response")
232
233
    if (any(is.na(response)))
234
        stop("missing values in response variable not allowed")
235
236
    resp <- initVariableFrame(response, ..., response = TRUE)
237
238
    RET <- new("LearningSampleFormula", inputs = inp, responses = resp,
239
               weights = rep(1, inp@nobs), nobs = inp@nobs,
240
               ninputs = inp@ninputs, menv = object)
241
    return(RET)
242
}
243
244
setMethod("LearningSample",
245
    signature = "ModelEnv",
246
    definition = LearningSample.ModelEnv
247
)
248
249
newinputs <- function(object, newdata = NULL) {
250
251
    if (is.null(newdata)) return(object@inputs)
252
    if (inherits(object, "LearningSampleFormula"))
253
        newdata <- object@menv@get("input", data = newdata)
254
255
    if (inherits(newdata, "VariableFrame"))
256
        return(newdata)
257
    if (inherits(newdata, "LearningSample"))
258
        return(newdata@inputs)
259
260
    return(initVariableFrame(newdata, trafo = ptrafo))
261
}