|
a |
|
b/common_scripts/statistics/useful_functions.R |
|
|
1 |
aggregate_by_mean <- function(data, xs) { |
|
|
2 |
if(dim(data)[2]!=length(xs))stop("length between data and variable 'xs' do not match!") |
|
|
3 |
library(reshape) |
|
|
4 |
library(data.table) |
|
|
5 |
# Convert to data.table. |
|
|
6 |
dat <- data.table(t(data)) |
|
|
7 |
# Append the vector of group names as an extra column. |
|
|
8 |
dat$agg_var <- as.character(xs) |
|
|
9 |
# Melt the data.table so all values are in one column called "value". |
|
|
10 |
dat <- melt(dat, id.vars = "agg_var") |
|
|
11 |
# Cast the data.table back into the original shape, and take the mean. |
|
|
12 |
dat <- dcast.data.table( |
|
|
13 |
dat, agg_var ~ variable, value.var = "value", |
|
|
14 |
fun.aggregate = mean, na.rm = TRUE |
|
|
15 |
) |
|
|
16 |
|
|
|
17 |
namecol=dat$agg_var |
|
|
18 |
|
|
|
19 |
# Delete the extra column. |
|
|
20 |
dat[ , agg_var := NULL] |
|
|
21 |
|
|
|
22 |
dat=t(dat) |
|
|
23 |
|
|
|
24 |
rownames(dat) <- rownames(data) |
|
|
25 |
colnames(dat)=namecol |
|
|
26 |
return(dat) |
|
|
27 |
} |
|
|
28 |
|
|
|
29 |
aggregate_by_median <- function(data, xs) { |
|
|
30 |
if(dim(data)[2]!=length(xs))stop("length between data and variable 'xs' do not match!") |
|
|
31 |
library(reshape) |
|
|
32 |
library(data.table) |
|
|
33 |
# Convert to data.table. |
|
|
34 |
dat <- data.table(t(data)) |
|
|
35 |
# Append the vector of group names as an extra column. |
|
|
36 |
dat$agg_var <- as.character(xs) |
|
|
37 |
# Melt the data.table so all values are in one column called "value". |
|
|
38 |
dat <- melt(dat, id.vars = "agg_var") |
|
|
39 |
# Cast the data.table back into the original shape, and take the mean. |
|
|
40 |
dat <- dcast.data.table( |
|
|
41 |
dat, agg_var ~ variable, value.var = "value", |
|
|
42 |
fun.aggregate = median, na.rm = TRUE |
|
|
43 |
) |
|
|
44 |
|
|
|
45 |
namecol=dat$agg_var |
|
|
46 |
|
|
|
47 |
# Delete the extra column. |
|
|
48 |
dat[ , agg_var := NULL] |
|
|
49 |
|
|
|
50 |
dat=t(dat) |
|
|
51 |
|
|
|
52 |
rownames(dat) <- rownames(data) |
|
|
53 |
colnames(dat)=namecol |
|
|
54 |
return(dat) |
|
|
55 |
} |