Switch to unified view

a b/research_paper_code/src/misc.R
1
# SUMMARY
2
# -------
3
# This file contains some function definitions that don't fit in any
4
# other file. Here is an overview of the functions defined in this
5
# file:
6
#
7
#    caterase(s)
8
#    logit10(x)
9
#    project.onto.interval(x,a,b)
10
#    center.columns(X)
11
#    is.binary.factor(x)
12
#    is.binary(x)
13
#    binfactor2num(x)
14
#    binary.from.categorical(x,col.names)
15
#    none.missing.row(x)
16
#    compute.maf(geno)
17
#    check.normal.quantiles(x)
18
#
19
# FUNCTION DEFINITIONS
20
# ----------------------------------------------------------------------
21
# Output the string using 'cat', then move the cursor back to the
22
# beginning of the string so that subsequent output will overwrite
23
# this string.
24
caterase <- function (s)
25
  cat(s,rep("\b",nchar(s)),sep="")
26
27
# ----------------------------------------------------------------------
28
# Return the logit of x. (Here we use the base-10 logarithm instead of the 
29
# more commonly used natural logarithm.)
30
logit10 <- function (x) {
31
  eps <- .Machine$double.eps
32
  return(log10((x + eps)/(1 - x + eps)))
33
}
34
35
# ----------------------------------------------------------------------
36
# Return x projected onto interval [a,b].
37
project.onto.interval <- function (x, a, b)
38
  pmin(b,pmax(a,x))
39
40
# ----------------------------------------------------------------------
41
# Center the columns of matrix X so that the entries in each column
42
# of X add up to 0.
43
# 
44
center.columns <- function (X) {
45
  y <- colMeans(X)
46
  X <- X - matrix(y,nrow(X),ncol(X),byrow = TRUE)
47
  return(X)
48
}
49
50
# ----------------------------------------------------------------------
51
# Return TRUE if x is a factor with exactly 2 levels.
52
is.binary.factor <- function (x)
53
  is.factor(x) & nlevels(x) == 2
54
    
55
# ----------------------------------------------------------------------
56
# Return TRUE if all elements of a numeric vector are either 0 or 1,
57
# ignoring missing values (NA).
58
is.binary <- function (x)
59
  is.numeric(x) & sum(!(x == 0 | x == 1),na.rm = TRUE) == 0
60
61
# ----------------------------------------------------------------------
62
# Convert a factor with exactly 2 levels to a numeric vector with
63
# values 0 or 1.
64
binfactor2num <- function (x) {
65
  if (!is.binary.factor(x))
66
    stop("Factor must have exactly 2 levels")
67
  return(as.numeric(x) - 1)
68
}
69
  
70
# ----------------------------------------------------------------------
71
# Return a data frame with one column for each level of factor x. The
72
# columns of the data frame encode the categorical variable with n
73
# levels as n binary variables.
74
binary.from.categorical <- function (x, col.names = NULL) {
75
76
  # Create a binary factor for each value of the categorical variable.
77
  d <- list()
78
  for (value in levels(x))
79
    d[[value]] <- factor(as.integer(x == value))
80
81
  # Convert the list to a data frame, and adjust the column names, if
82
  # requested.
83
  d <- data.frame(d,check.names = FALSE)
84
  if (!is.null(col.names))
85
    names(d) <- col.names
86
87
  # Output the newly created data frame.
88
  return(d)
89
}
90
91
# ----------------------------------------------------------------------
92
# For each row of the matrix or data frame, return TRUE if all the
93
# entries in the row are provided (that is, they are not missing).
94
none.missing.row <- function (x)
95
  rowSums(is.na(x)) == 0
96
97
# ----------------------------------------------------------------------
98
# Returns the minor allele frequency given a vector of genotypes
99
# encoded as allele counts.
100
compute.maf <- function (geno) {
101
  f <- mean(geno,na.rm = TRUE)/2
102
  return(min(f,1-f))
103
}
104
105
# ----------------------------------------------------------------------
106
# Compare the empirical quantiles of x against the expected valuesx
107
# under the normal distribution.
108
check.normal.quantiles <- function (x) {
109
110
  # Discard the missing values.
111
  x <- x[!is.na(x)]
112
    
113
  # Transform the observations so that they have zero mean and unit
114
  # variance.
115
  x <- (x - mean(x))/sqrt(var(x))
116
  
117
  # Create a data frame giving the observed and expected proportion of
118
  # samples within 1, 2 and 3 standard deviations of the mean.
119
  return(data.frame(exp = c(pnorm(1) - pnorm(-1),
120
                            pnorm(2) - pnorm(-2),
121
                            pnorm(3) - pnorm(-3)),
122
                    obs = c(mean(-1 < x & x < 1),
123
                            mean(-2 < x & x < 2),
124
                            mean(-3 < x & x < 3)),
125
                    row.names = c("sd1","sd2","sd3")))
126
}