a b/InformationTheory-PredictiveRankings.R
1
###################################################################
2
########## Predictive Rankings ####################################
3
###################################################################
4
5
6
###################################################################
7
########## Output Categorical - Covariates Categorical ############
8
###################################################################
9
#### First Order Rankings - INFO ####
10
#####################################
11
 INFO.Output_Categorical.Covariates_Categorical <- function(data,labels,treatment){
12
  
13
  num_features <- ncol(data)
14
  mi_scores <-  rep(0, num_features)
15
  ### Calculate the I(Y;T|X) for each Xs 
16
  for (index_feature in 1:num_features){ 
17
      mi_scores[index_feature] =  condinformation_normalised(treatment,labels,data[,index_feature])# condinformation(treatment,labels,data[,index_feature],method="shrink")  
18
    }
19
  
20
  #### Prepare the return functions
21
  sorted_scores <- sort(mi_scores, decreasing=T,method='shell',index.return=TRUE) 
22
  ranking_scores <- sort(sorted_scores$ix, decreasing=F,method='shell',index.return=TRUE)
23
  results <- list("scores" = mi_scores, "ranking" = sorted_scores$ix, "ranking_scores" = ranking_scores$ix)
24
  
25
  return(results)
26
  
27
}
28
#######################################
29
#### Second order rankings - INFO+ ####
30
#######################################
31
 INFOplus.Output_Categorical.Covariates_Categorical <- function(data,labels,treatment, top_k){
32
  num_features <- ncol(data)
33
  mi_scores <-  rep(0, num_features)
34
  ranking_scores <-     rep(0, num_features)
35
  ranking <-    rep(0, num_features)
36
  selected_features <- 0   
37
  ### First Step: Select the first covariate (this is equivalent of using the  INFO and take the first features) 
38
  VT.First <-  INFO.Output_Categorical.Covariates_Categorical(data,labels,treatment)
39
  
40
  selected_features[1]<-VT.First$ranking[1]
41
  ranking_scores[selected_features[1]] <- 1
42
  mi_scores[selected_features[1]] <- VT.First$scores[VT.First$ranking[1]]
43
  
44
  #### Second Step: Iteretavely rank the features, by estimating the second order criterion for each one of them, and take the features with the highest score
45
  not_selected_features <- setdiff(1:num_features,selected_features)
46
  score_per_feature <- array(0,dim=c(1,num_features))
47
  
48
  score_per_feature[selected_features[1]]<-NA
49
  count_cmi <- num_features
50
  for (count in 2:top_k){
51
    ### Check the score of each feature not selected so far
52
    
53
    for (index_feature_ns in 1:length(not_selected_features)){
54
      ## To calculate this score we should calculate the conditional mutual information with the features selected
55
      conditioning_features <- do.call(interaction,data[,c(not_selected_features[index_feature_ns], selected_features[count-1])])
56
      score_per_feature[not_selected_features[index_feature_ns]] <-  score_per_feature[not_selected_features[index_feature_ns]] + condinformation_normalised(treatment,labels,conditioning_features)# condinformation(treatment,labels,conditioning_features,method="shrink") 
57
      count_cmi <- count_cmi+1
58
      
59
    }
60
    
61
    selected_features[count] <- which.max(score_per_feature) ### It ignores the NA, for that reason I check all of the features (the already selected they have score NA)
62
    ranking_scores[selected_features[count]] <- count
63
    
64
    mi_scores[selected_features[count]] <-  score_per_feature[selected_features[count]]
65
    score_per_feature[selected_features[count]]<-NA
66
    not_selected_features <- setdiff(1:num_features,selected_features)
67
    
68
  }
69
  
70
  ranking_scores[ranking_scores==0] <- (top_k+1):num_features
71
  results <- list("scores" = mi_scores, "ranking" = selected_features, "ranking_scores" = ranking_scores,"count_cmi" = count_cmi)
72
  
73
  return(results)
74
}
75
76
###################################################################
77
########## Output Categorical - Covariates Continuous# ############
78
###################################################################
79
#### First Order Rankings - INFO ####
80
#####################################
81
 INFO.Output_Categorical.Covariates_Continuous <-  function(data,labels,treatment){
82
  num_features <- ncol(data)
83
  ### First step - Discretization
84
  for (index_feature in 1:num_features){ 
85
    ### Use Scott's rule to discretize
86
    data[,index_feature] = discretize( data[,index_feature], disc="equalwidth", nbins=nclass.scott(data[,index_feature]))
87
  }
88
  
89
  ### Second step - Derive ranking, by normalising with the conditional entropy
90
  mi_scores <-  rep(0, num_features)
91
  ### Calculate the I(Y;T|X) for each Xs 
92
  for (index_feature in 1:num_features){ 
93
    mi_scores[index_feature] =  condinformation_normalised(treatment,labels,data[,index_feature])  
94
  }
95
  
96
  #### Prepare the return functions
97
  sorted_scores <- sort(mi_scores, decreasing=T,method='shell',index.return=TRUE) 
98
  ranking_scores <- sort(sorted_scores$ix, decreasing=F,method='shell',index.return=TRUE)
99
  results <- list("scores" = mi_scores, "ranking" = sorted_scores$ix, "ranking_scores" = ranking_scores$ix)
100
  
101
  return(results)
102
}
103
104
#######################################
105
#### Second order rankings - INFO+ ####
106
#######################################
107
 INFOplus.Output_Categorical.Covariates_Continuous <-  function(data,labels,treatment, top_k){
108
109
  num_features <- ncol(data)
110
  mi_scores <-  rep(0, num_features)
111
  ranking_scores <-     rep(0, num_features)
112
  ranking <-    rep(0, num_features)
113
  selected_features <- 0   
114
  
115
  ### First Step: Select the first covariate (this is equivalent of using the  INFO and take the first features) 
116
  VT.First <-  INFO.Output_Categorical.Covariates_Continuous(data,labels,treatment)
117
  
118
  selected_features[1]<-VT.First$ranking[1]
119
  ranking_scores[selected_features[1]] <- 1
120
  mi_scores[selected_features[1]] <- VT.First$scores[VT.First$ranking[1]]
121
 
122
  ### Discretization
123
   for (index_feature in 1:num_features){ 
124
     ### Use Scott's rule to discretize
125
     data[,index_feature] = discretize( data[,index_feature], disc="equalwidth", nbins=nclass.scott(data[,index_feature]))
126
    }
127
  
128
129
  #### Second Step: Iteratively rank the features, by estimating the second order criterion for each one of them, and take the features with the highest score
130
  not_selected_features <- setdiff(1:num_features,selected_features)
131
  score_per_feature <- array(0,dim=c(1,num_features))
132
  
133
  score_per_feature[selected_features[1]]<-NA
134
  count_cmi <- num_features
135
  for (count in 2:top_k){
136
    ### Check the score of each feature not selected so far
137
    
138
    for (index_feature_ns in 1:length(not_selected_features)){
139
      ## To calculate this score we should calculate the conditional mutual information with the features selected
140
      conditioning_features <- do.call(interaction,data[,c(not_selected_features[index_feature_ns], selected_features[count-1])])
141
      score_per_feature[not_selected_features[index_feature_ns]] <-  score_per_feature[not_selected_features[index_feature_ns]] + condinformation_normalised(treatment,labels,conditioning_features) 
142
      count_cmi <- count_cmi+1
143
      
144
    }
145
    
146
    selected_features[count] <- which.max(score_per_feature) ### It ignores the NA, for that reason I check all of the features (the already selected they have score NA)
147
    ranking_scores[selected_features[count]] <- count
148
    
149
    mi_scores[selected_features[count]] <-  score_per_feature[selected_features[count]]
150
    score_per_feature[selected_features[count]]<-NA
151
    not_selected_features <- setdiff(1:num_features,selected_features)
152
    
153
  }
154
  
155
  ranking_scores[ranking_scores==0] <- (top_k+1):num_features
156
  results <- list("scores" = mi_scores, "ranking" = selected_features, "ranking_scores" = ranking_scores,"count_cmi" = count_cmi)
157
  
158
  return(results)
159
}
160
# The normalized conditional mutual information with respect to conditional entropy
161
condinformation_normalised <- function(treatment, labels, features)
162
{
163
  
164
  cmi_normalised <- condinformation(treatment,labels,features,method="shrink")   / sqrt(condentropy(treatment, features, method="shrink")*condentropy(labels, features, method="shrink"))
165
  return(cmi_normalised)
166
}
167
168
169
170
171
###################################################################
172
########## Output Survival - Covariates Categorical ###############
173
###################################################################
174
#### Estimate conditional mutual informaiton with survival outputs
175
condinformation_survival_normalised <- function(treatment, labels, features, times, censor_groups){
176
  sample_size <- length(labels)
177
  time_disc <- sort(unique(times))
178
  
179
  # Follow SIDES approach to use an extra parameter 
180
  times_steps <-  seq(0, length(time_disc), length.out = censor_groups + 1)
181
  cmi_normalised<-integer(length(times_steps)-1)
182
  
183
  for (index_steps in 2:(length(times_steps))){ 
184
    
185
    labels_step <- integer(sample_size)
186
    labels_step[which(times<=time_disc[times_steps[index_steps]])] <-  labels[which(times<=time_disc[times_steps[index_steps]])]
187
    
188
    
189
    cmi_normalised[index_steps] <-  condinformation(treatment,labels_step,features,method="shrink")/sqrt(condentropy(treatment, features, method="shrink")*condentropy(labels_step, features, method="shrink"))
190
    
191
  }
192
  
193
  return( mean(cmi_normalised,na.rm=TRUE))  
194
  
195
}
196
197
#####################################
198
#### First Order Rankings - INFO ####
199
#####################################
200
 INFO.Output_Survival.Covariates_Categorical <- function(data,labels,treatment,times,censor_groups){
201
  
202
  num_features <- ncol(data)
203
  mi_scores <-  rep(0, num_features)
204
  ### Calculate the I(Y;T|X) for each Xs 
205
  for (index_feature in 1:num_features){ 
206
    mi_scores[index_feature] =  condinformation_survival_normalised(treatment,labels,data[,index_feature], times, censor_groups)  
207
  }
208
  
209
  #### Prepare the return functions
210
  sorted_scores <- sort(mi_scores, decreasing=T,method='shell',index.return=TRUE) 
211
  ranking_scores <- sort(sorted_scores$ix, decreasing=F,method='shell',index.return=TRUE)
212
  results <- list("scores" = mi_scores, "ranking" = sorted_scores$ix, "ranking_scores" = ranking_scores$ix)
213
  
214
  return(results)
215
  
216
}
217
218
#######################################
219
#### Second order rankings - INFO+ ####
220
#######################################
221
 INFOplus.Output_Survival.Covariates_Categorical <- function(data,labels, treatment, times, censor_groups, top_k){
222
  num_features <- ncol(data)
223
  mi_scores <-  rep(0, num_features)
224
  ranking_scores <-     rep(0, num_features)
225
  ranking <-    rep(0, num_features)
226
  selected_features <- 0   
227
  ### First Step: Select the first covariate (this is equivalent of using the  INFO and take the first features) 
228
  VT.First <-  INFO.Output_Survival.Covariates_Categorical (data,labels,treatment,times, censor_groups)
229
  
230
  selected_features[1]<-VT.First$ranking[1]
231
  ranking_scores[selected_features[1]] <- 1
232
  mi_scores[selected_features[1]] <- VT.First$scores[VT.First$ranking[1]]
233
  
234
  #### Second Step: Iteratively rank the features, by estimating the second order criterion for each one of them, and take the features with the highest score
235
  not_selected_features <- setdiff(1:num_features,selected_features)
236
  score_per_feature <- array(0,dim=c(1,num_features))
237
  
238
  score_per_feature[selected_features[1]]<-NA
239
  count_cmi <- num_features
240
  for (count in 2:top_k){
241
    ### Check the score of each feature not selected so far
242
    
243
    for (index_feature_ns in 1:length(not_selected_features)){
244
      ## To calculate this score we should calculate the conditional mutual information with the features selected
245
      conditioning_features <- do.call(interaction,data[,c(not_selected_features[index_feature_ns], selected_features[count-1])])
246
      score_per_feature[not_selected_features[index_feature_ns]] <-  score_per_feature[not_selected_features[index_feature_ns]] + condinformation_survival_normalised(treatment,labels,conditioning_features,times, censor_groups) #/  condentropy(treatment + 2*labels, data[,not_selected_features[index_feature_ns]], method="shrink")
247
      count_cmi <- count_cmi+1
248
      
249
    }
250
    
251
    selected_features[count] <- which.max(score_per_feature) ### It ignores the NA, for that reason I check all of the features (the already selected they have score NA)
252
    ranking_scores[selected_features[count]] <- count
253
    
254
    mi_scores[selected_features[count]] <-  score_per_feature[selected_features[count]]
255
    score_per_feature[selected_features[count]]<-NA
256
    not_selected_features <- setdiff(1:num_features,selected_features)
257
    
258
  }
259
  
260
  ranking_scores[ranking_scores==0] <- (top_k+1):num_features
261
  results <- list("scores" = mi_scores, "ranking" = selected_features, "ranking_scores" = ranking_scores,"count_cmi" = count_cmi)
262
  
263
  return(results)
264
}
265
266
###################################################################
267
########## Output Survival - Covariates Continuous ################
268
###################################################################
269
#### First order rankings - INFO ####
270
#####################################
271
 INFO.Output_Survival.Covariates_Continuous <-  function(data, labels, treatment, times, censor_groups){
272
  ### First step - Discretization
273
  for (index_feature in 1:num_features){ 
274
    ### Use Scott's rule to discretize
275
    data[,index_feature] = discretize( data[,index_feature], disc="equalwidth", nbins=nclass.scott(data[,index_feature]))
276
  }
277
  
278
  ### Second step - Derive ranking, by normalizing with the conditional entropy
279
  num_features <- ncol(data)
280
  mi_scores <-  rep(0, num_features)
281
  ### Calculate the I(Y;T|X) for each Xs 
282
  for (index_feature in 1:num_features){ 
283
    mi_scores[index_feature] =    condinformation_survival_normalised(treatment,labels,data[,index_feature], times, censor_groups) 
284
  }
285
286
  #### Prepare the return functions
287
  sorted_scores <- sort(mi_scores, decreasing=T,method='shell',index.return=TRUE) 
288
  ranking_scores <- sort(sorted_scores$ix, decreasing=F,method='shell',index.return=TRUE)
289
  results <- list("scores" = mi_scores, "ranking" = sorted_scores$ix, "ranking_scores" = ranking_scores$ix)
290
  
291
  return(results)
292
}
293
294
#######################################
295
#### Second order rankings - INFO+ ####
296
#######################################
297
 INFOplus.Output_Survival.Covariates_Continuous <-  function(data, labels, treatment, times, censor_groups, top_k){
298
  ### First step - Discretization
299
  for (index_feature in 1:num_features){ 
300
    ### Use Scott's rule to discretize
301
    
302
    data[,index_feature] = discretize( data[,index_feature], disc="equalwidth", nbins=nclass.scott(data[,index_feature]))
303
  }
304
  
305
  ### Second step - Derive ranking, by normalising with the conditional entropy
306
  num_features <- ncol(data)
307
  mi_scores <-  rep(0, num_features)
308
  ranking_scores <-     rep(0, num_features)
309
  ranking <-    rep(0, num_features)
310
  selected_features <- 0   
311
  ### First Step: Select the first covariate (this is equivalent of using the  INFO and take the first features) 
312
  VT.First <-  INFO.Output_Survival.Covariates_Continuous(data,labels,treatment, times, censor_groups)
313
  
314
  selected_features[1]<-VT.First$ranking[1]
315
  ranking_scores[selected_features[1]] <- 1
316
  mi_scores[selected_features[1]] <- VT.First$scores[VT.First$ranking[1]]
317
  
318
  #### Second Step: Iteretavely rank the features, by estimating the second order criterion for each one of them, and take the features with the highest score
319
  not_selected_features <- setdiff(1:num_features,selected_features)
320
  score_per_feature <- array(0,dim=c(1,num_features))
321
  
322
  score_per_feature[selected_features[1]]<-NA
323
  count_cmi <- num_features
324
  for (count in 2:top_k){
325
    ### Check the score of each feature not selected so far
326
    
327
    for (index_feature_ns in 1:length(not_selected_features)){
328
      ## To calculate this score we should calculate the conditional mutual information with the features selected
329
      conditioning_features <- do.call(interaction,data[,c(not_selected_features[index_feature_ns], selected_features[count-1])])
330
      score_per_feature[not_selected_features[index_feature_ns]] <-  score_per_feature[not_selected_features[index_feature_ns]] + condinformation_survival_normalised(treatment, labels, conditioning_features, times, censor_groups) 
331
      count_cmi <- count_cmi+1
332
      
333
    }
334
    
335
    selected_features[count] <- which.max(score_per_feature) ### It ignores the NA, for that reason I check all of the features (the already selected they have score NA)
336
    ranking_scores[selected_features[count]] <- count
337
    
338
    mi_scores[selected_features[count]] <-  score_per_feature[selected_features[count]]
339
    score_per_feature[selected_features[count]]<-NA
340
    not_selected_features <- setdiff(1:num_features,selected_features)
341
    
342
  }
343
  
344
  ranking_scores[ranking_scores==0] <- (top_k+1):num_features
345
  results <- list("scores" = mi_scores, "ranking" = selected_features, "ranking_scores" = ranking_scores,"count_cmi" = count_cmi)
346
  
347
  return(results)
348
}
349
350
351
352