Switch to unified view

a b/partyMod/src/Distributions.c
1
2
/**
3
    Conditional Distributions
4
    *\file Distributions.c
5
    *\author $Author$
6
    *\date $Date$
7
*/
8
                
9
#include "party.h"
10
11
12
/**
13
    Conditional asymptotic P-value of a quadratic form\n
14
    *\param tstat test statistic
15
    *\param df degree of freedom
16
*/
17
        
18
double C_quadformConditionalPvalue(const double tstat, const double df) {
19
    return(pchisq(tstat, df, 0, 0));
20
}
21
22
23
/**
24
    R-interface to C_quadformConditionalPvalue\n
25
    *\param tstat test statitstic
26
    *\param df degree of freedom
27
*/
28
29
SEXP R_quadformConditionalPvalue(SEXP tstat, SEXP df) {
30
31
    SEXP ans;
32
    
33
    PROTECT(ans = allocVector(REALSXP, 1));
34
    REAL(ans)[0] = C_quadformConditionalPvalue(REAL(tstat)[0], REAL(df)[0]);
35
    UNPROTECT(1);
36
    return(ans);
37
}
38
39
40
/**
41
    Conditional asymptotic P-value of a maxabs-type test statistic\n
42
    Basically the functionality from package `mvtnorm' \n
43
    *\param tstat test statitstic
44
    *\param Sigma covariance matrix
45
    *\param pq nrow(Sigma)
46
    *\param maxpts number of Monte-Carlo steps
47
    *\param releps relative error
48
    *\param abseps absolute error
49
    *\param tol tolerance
50
*/
51
52
double C_maxabsConditionalPvalue(const double tstat, const double *Sigma, 
53
    const int pq, int *maxpts, double *releps, double *abseps, double *tol) {
54
55
    int *n, *nu, *inform, i, j, *infin, sub, *index, nonzero, iz, jz;
56
    double *lower, *upper, *delta, *corr, *sd, *myerror,
57
           *prob, ans;
58
59
    /* univariate problem */
60
    if (pq == 1) 
61
        return(2*pnorm(fabs(tstat)*-1.0, 0.0, 1.0, 1, 0)); /* return P-value */
62
    
63
    n = Calloc(1, int);
64
    nu = Calloc(1, int);
65
    myerror = Calloc(1, double);
66
    prob = Calloc(1, double);
67
    nu[0] = 0;
68
    inform = Calloc(1, int);
69
    n[0] = pq;
70
        
71
    if (n[0] == 2)  
72
         corr = Calloc(1, double);
73
    else 
74
         corr = Calloc(n[0] + ((n[0] - 2) * (n[0] - 1))/2, double);
75
    
76
    sd = Calloc(n[0], double);
77
    lower = Calloc(n[0], double);
78
    upper = Calloc(n[0], double);
79
    infin = Calloc(n[0], int);
80
    delta = Calloc(n[0], double);
81
    index = Calloc(n[0], int);
82
83
    /* determine elements with non-zero variance */ 
84
85
    nonzero = 0;
86
    for (i = 0; i < n[0]; i++) {
87
        if (Sigma[i*n[0] + i] > tol[0]) {
88
            index[nonzero] = i;
89
            nonzero++;
90
        }
91
    }
92
93
    /* mvtdst assumes the unique elements of the triangular 
94
       covariance matrix to be passes as argument CORREL 
95
    */
96
97
    for (iz = 0; iz < nonzero; iz++) {
98
99
        /* handle elements with non-zero variance only */
100
        i = index[iz];
101
102
        /* standard deviations */
103
        sd[i] = sqrt(Sigma[i*n[0] + i]);
104
                
105
        /* always look at the two-sided problem */           
106
        lower[iz] = fabs(tstat) * -1.0;
107
        upper[iz] = fabs(tstat);
108
        infin[iz] = 2;
109
        delta[iz] = 0.0;
110
        
111
        /* set up vector of correlations, i.e., the upper 
112
           triangular part of the covariance matrix) */
113
        for (jz = 0; jz < iz; jz++) {
114
            j = index[jz]; 
115
            sub = (int) (jz + 1) + (double) ((iz - 1) * iz) / 2 - 1;
116
            if (sd[i] == 0.0 || sd[j] == 0.0) 
117
                corr[sub] = 0.0; 
118
            else 
119
                corr[sub] = Sigma[i*n[0] + j] / (sd[i] * sd[j]);
120
        }
121
    }
122
    n[0] = nonzero;
123
        
124
    /* call FORTRAN subroutine */
125
    F77_CALL(mvtdst)(n, nu, lower, upper, infin, corr, delta, 
126
                     maxpts, abseps, releps, myerror, prob, inform);
127
                         
128
    /* inform == 0 means: everything is OK */
129
    switch (inform[0]) {
130
        case 0: break;
131
        case 1: warning("cmvnorm: completion with ERROR > EPS"); break;
132
        case 2: warning("cmvnorm: N > 1000 or N < 1"); 
133
                prob[0] = 0.0; 
134
                break;
135
        case 3: warning("cmvnorm: correlation matrix not positive semi-definite"); 
136
                prob[0] = 0.0; 
137
                break;
138
        default: warning("cmvnorm: unknown problem in MVTDST");
139
                 prob[0] = 0.0;
140
    }
141
    ans = prob[0];
142
    Free(corr); Free(sd); Free(lower); Free(upper); 
143
    Free(infin); Free(delta); Free(myerror); Free(prob);
144
    Free(n); Free(nu); Free(inform); 
145
    return(1 - ans);  /* return P-value */
146
}
147
148
149
/**
150
   R-interface to C_maxabsConditionalPvalue \n
151
   *\param tstat test statitstic
152
   *\param Sigma covariance matrix
153
   *\param maxpts number of Monte-Carlo steps
154
   *\param releps relative error
155
   *\param abseps absolute error
156
   *\param tol tolerance
157
*/
158
159
SEXP R_maxabsConditionalPvalue(SEXP tstat, SEXP Sigma, SEXP maxpts, 
160
                               SEXP releps, SEXP abseps, SEXP tol) {
161
           
162
    SEXP ans;                    
163
    int pq;
164
    
165
    pq = nrow(Sigma);
166
   
167
    PROTECT(ans = allocVector(REALSXP, 1));
168
    REAL(ans)[0] = C_maxabsConditionalPvalue(REAL(tstat)[0], REAL(Sigma), pq, 
169
        INTEGER(maxpts), REAL(releps), REAL(abseps), REAL(tol));
170
    UNPROTECT(1);
171
    return(ans);
172
}
173
174
175
/**
176
    Monte-Carlo approximation to the conditional pvalues 
177
    *\param criterion vector of node criteria for each input 
178
    *\param learnsample an object of class `LearningSample'
179
    *\param weights case weights
180
    *\param fitmem an object of class `TreeFitMemory'
181
    *\param varctrl an object of class `VariableControl'
182
    *\param gtctrl an object of class `GlobalTestControl'
183
    *\param ans_pvalues return values; vector of adjusted pvalues 
184
*/
185
186
void C_MonteCarlo(double *criterion, SEXP learnsample, SEXP weights, 
187
                  SEXP fitmem, SEXP varctrl, SEXP gtctrl, double *ans_pvalues) {
188
189
    int ninputs, nobs, j, i, k;
190
    SEXP responses, inputs, y, x, xmem, expcovinf;
191
    double sweights, *stats, tmp = 0.0, smax, *dweights; 
192
    int m, *counts, b, B, *dummy, *permindex, *index, *permute;
193
    
194
    ninputs = get_ninputs(learnsample);
195
    nobs = get_nobs(learnsample);
196
    responses = GET_SLOT(learnsample, PL2_responsesSym);
197
    inputs = GET_SLOT(learnsample, PL2_inputsSym);
198
    dweights = REAL(weights);
199
    
200
    /* number of Monte-Carlo replications */
201
    B = get_nresample(gtctrl);
202
    
203
    /* y = get_transformation(responses, 1); */
204
    y = get_test_trafo(responses);
205
    
206
    expcovinf = GET_SLOT(fitmem, PL2_expcovinfSym);
207
208
    sweights = REAL(GET_SLOT(expcovinf, PL2_sumweightsSym))[0];
209
    m = (int) sweights;
210
    
211
    stats = Calloc(ninputs, double);
212
    counts = Calloc(ninputs, int);
213
    
214
    dummy = Calloc(m, int);
215
    permute = Calloc(m, int);
216
    index = Calloc(m, int);
217
    permindex = Calloc(m, int);
218
                
219
    /* expand weights, see appendix of 
220
       `Unbiased Recursive Partitioning: A Conditional Inference Framework' */
221
    j = 0;
222
    for (i = 0; i < nobs; i++) {
223
        for (k = 0; k < dweights[i]; k++) {
224
            index[j] = i;
225
            j++;
226
        }
227
    }
228
229
    for (b = 0; b < B; b++) {
230
231
        /* generate a admissible permutation */
232
        C_SampleNoReplace(dummy, m, m, permute);
233
        for (k = 0; k < m; k++) permindex[k] = index[permute[k]];
234
235
        /* for all input variables */
236
        for (j = 1; j <= ninputs; j++) {
237
            x = get_transformation(inputs, j);
238
239
            /* compute test statistic or pvalue for the permuted data */
240
            xmem = get_varmemory(fitmem, j);
241
            if (!has_missings(inputs, j)) {
242
                C_PermutedLinearStatistic(REAL(x), ncol(x), REAL(y), ncol(y), 
243
                    nobs, m, index, permindex, 
244
                    REAL(GET_SLOT(xmem, PL2_linearstatisticSym)));
245
            } else {
246
                error("cannot resample with missing values");
247
            }
248
            
249
            /* compute the criterion, i.e. something to be MAXIMISED */
250
            C_TeststatCriterion(xmem, varctrl, &tmp, &stats[j - 1]);
251
        }
252
        
253
        /* the maximum of the permuted test statistics / 1 - pvalues */
254
        smax = C_max(stats, ninputs);
255
256
        /* count the number of permuted > observed */
257
        for (j = 0; j < ninputs; j++) {
258
            if (smax > criterion[j]) counts[j]++;
259
        }
260
    }
261
    
262
    /* return adjusted pvalues */
263
    for (j = 0; j < ninputs; j++)
264
        ans_pvalues[j] = (double) counts[j] / B;
265
                
266
    /* <FIXME> we try to assess the linear statistics later on 
267
               (in C_Node, for categorical variables) 
268
               but have used this memory for resampling here */
269
270
    for (j = 1; j <= ninputs; j++) {
271
        x = get_transformation(inputs, j);
272
        /* re-compute linear statistics for unpermuted data */
273
        xmem = get_varmemory(fitmem, j);
274
        C_LinearStatistic(REAL(x), ncol(x), REAL(y), ncol(y), 
275
                      dweights, nobs, 
276
                      REAL(GET_SLOT(xmem, PL2_linearstatisticSym)));
277
    }
278
    /* </FIXME> */
279
    
280
    Free(stats); Free(counts); Free(dummy); Free(permute); 
281
    Free(index); Free(permindex);
282
}
283
284
285
/**
286
    R-interface to C_MonteCarlo \n
287
    *\param criterion vector of node criteria for each input
288
    *\param learnsample an object of class `LearningSample'
289
    *\param weights case weights
290
    *\param fitmem an object of class `TreeFitMemory'
291
    *\param varctrl an object of class `VariableControl'
292
    *\param gtctrl an object of class `GlobalTestControl'
293
*/
294
295
SEXP R_MonteCarlo(SEXP criterion, SEXP learnsample, SEXP weights, 
296
                  SEXP fitmem, SEXP varctrl, SEXP gtctrl) {
297
                  
298
     SEXP ans;
299
     
300
     GetRNGstate();
301
     
302
     PROTECT(ans = allocVector(REALSXP, get_ninputs(learnsample)));
303
     C_MonteCarlo(REAL(criterion), learnsample, weights, fitmem, varctrl, 
304
                  gtctrl, REAL(ans));
305
                  
306
     PutRNGstate();
307
                  
308
     UNPROTECT(1);
309
     return(ans);
310
}