a b/common_scripts/statistics/useful_functions.R
1
aggregate_by_mean <- function(data, xs) {
2
  if(dim(data)[2]!=length(xs))stop("length between data and variable 'xs' do not match!")
3
  library(reshape)
4
  library(data.table)
5
  # Convert to data.table.
6
  dat <- data.table(t(data))
7
  # Append the vector of group names as an extra column.
8
  dat$agg_var <- as.character(xs)
9
  # Melt the data.table so all values are in one column called "value".
10
  dat <- melt(dat, id.vars = "agg_var")
11
  # Cast the data.table back into the original shape, and take the mean.
12
  dat <- dcast.data.table(
13
    dat, agg_var ~ variable, value.var = "value",
14
    fun.aggregate = mean, na.rm = TRUE
15
  )
16
  
17
  namecol=dat$agg_var
18
  
19
  # Delete the extra column.
20
  dat[ , agg_var := NULL]
21
  
22
  dat=t(dat)
23
  
24
  rownames(dat) <- rownames(data)
25
  colnames(dat)=namecol
26
  return(dat)
27
}
28
29
aggregate_by_median <- function(data, xs) {
30
  if(dim(data)[2]!=length(xs))stop("length between data and variable 'xs' do not match!")
31
  library(reshape)
32
  library(data.table)
33
  # Convert to data.table.
34
  dat <- data.table(t(data))
35
  # Append the vector of group names as an extra column.
36
  dat$agg_var <- as.character(xs)
37
  # Melt the data.table so all values are in one column called "value".
38
  dat <- melt(dat, id.vars = "agg_var")
39
  # Cast the data.table back into the original shape, and take the mean.
40
  dat <- dcast.data.table(
41
    dat, agg_var ~ variable, value.var = "value",
42
    fun.aggregate = median, na.rm = TRUE
43
  )
44
  
45
  namecol=dat$agg_var
46
  
47
  # Delete the extra column.
48
  dat[ , agg_var := NULL]
49
  
50
  dat=t(dat)
51
  
52
  rownames(dat) <- rownames(data)
53
  colnames(dat)=namecol
54
  return(dat)
55
}