a b/research_paper_code/src/gemma.R
1
# SUMMARY
2
# -------
3
# This file defines some functions that I use for QTL mapping in
4
# GEMMA. Here is an overview of the functions defined in this file:
5
#
6
#   write.gemma.pheno(file,phenotype,pheno)
7
#   write.gemma.covariates(file,covariates,pheno) 
8
#   write.gemma.map(file,map)
9
#   write.gemma.geno(file,geno,map)
10
#   read.gemma.assoc(file)
11
#   run.gemma(phenotype,covariates,pheno,geno,map,gemmadir,gemma.exe,
12
#             chromosomes)
13
#   run.gemma.norr(phenotype,covariates,pheno,geno,map,gemmadir,gemma.exe) 
14
#
15
# FUNCTION DEFINITIONS
16
# ----------------------------------------------------------------------
17
# Write the phenotype data to a file in the format used by GEMMA. Each
18
# line of the file contains one phenotype observation.
19
write.gemma.pheno <- function (file, phenotype, pheno) {
20
  y <- pheno[phenotype]
21
  if (is.numeric(y))
22
    y <- round(y,digits = 6)
23
  write.table(y,file,quote = FALSE,row.names = FALSE,col.names = FALSE)
24
}
25
26
# ----------------------------------------------------------------------
27
# Write the covariate data to a file in the format used by GEMMA. Each
28
# line corresponds to a sample. We must include an additional
29
# covariate for the intercept.
30
write.gemma.covariates <- function (file, covariates, pheno) {
31
  if (is.null(covariates)) {
32
    write.table(data.frame(rep(1,nrow(pheno))),file,sep = " ",
33
                quote = FALSE,row.names = FALSE,col.names = FALSE)
34
  } else {
35
    write.table(cbind(1,data.frame(lapply(pheno[covariates],function (x) {
36
      if (is.numeric(x))
37
        round(x,digits=6)
38
      else
39
        x
40
    }))),file,sep = " ",quote = FALSE,row.names = FALSE,col.names = FALSE)
41
  }
42
}
43
44
# ----------------------------------------------------------------------
45
# Write the SNP information to a space-delimited text file in the
46
# format used by GEMMA. This file contains one line per SNP, with
47
# three columns: (1) SNP label, (2) base-pair position, (3)
48
# chromosome.
49
write.gemma.map <- function (file, map)
50
  write.table(map[c("id","pos","chr")],file,sep = " ",quote = FALSE,
51
              row.names = FALSE,col.names = FALSE)
52
53
# ----------------------------------------------------------------------
54
# Store the mean genotypes as a space-delimited text file in the
55
# format used by GEMMA, in which we have one row per SNP, and one
56
# column per sample. The first three column give the SNP label, and
57
# the two alleles.
58
write.gemma.geno <- function (file, geno, map) {
59
  geno <- t(geno)
60
  geno <- as.data.frame(geno,check.names = FALSE)
61
  geno <- round(geno,digits = 3)
62
  geno <- cbind(map[c("id","ref","alt")],geno)
63
  write.table(geno,file,sep = " ",quote = FALSE,row.names = FALSE,
64
              col.names = FALSE)
65
}
66
67
# ----------------------------------------------------------------------
68
# Reads in the association results from GEMMA, and returns a data
69
# frame containing three columns: chromosome number ("chr"); base-pair
70
# position ("pos"); and the base-10 logarithm of the p-value ("log10p").
71
read.gemma.assoc <- function (file) {
72
  gwscan <- read.table(file,sep = "\t",header = TRUE,check.names = FALSE,
73
                  quote = "",stringsAsFactors = FALSE)
74
  rownames(gwscan) <- gwscan$rs
75
  gwscan           <- gwscan[c("chr","ps","p_lrt")]
76
  gwscan           <- transform(gwscan,p_lrt = -log10(p_lrt))
77
  colnames(gwscan) <- c("chr","pos","log10p")
78
  return(gwscan)
79
}
80
81
# ----------------------------------------------------------------------
82
# This function maps QTLs using GEMMA, writing all the files required
83
# by GEMMA to the directory specified by "gemmadir". The QTLs are
84
# mapped separately for each chromosome, in which the kinship matrix
85
# is computed using all markers except the markers on the given
86
# chromosome.
87
run.gemma <- function (phenotype, covariates, pheno, geno, map,
88
                       gemmadir, gemma.exe, chromosomes = NULL) {
89
90
  # Give summary of analysis.
91
  cat("Mapping QTLs for",phenotype,"in",nrow(pheno),"mice, ")
92
  if (!is.null(covariates)) {
93
    cat("controlling for ",paste(covariates,collapse=" + "),".\n",sep="")
94
  } else {
95
    cat("with no covariates included.\n")
96
  }
97
98
  # Write the phenotype and covariate data to separate files.
99
  cat("Writing phenotype and covariate data to file.\n")
100
  write.gemma.pheno(paste0(gemmadir,"/pheno.txt"),phenotype,pheno)
101
  write.gemma.covariates(paste0(gemmadir,"/covariates.txt"),covariates,pheno) 
102
103
  # We will map QTLs on these chromosomes.
104
  if (is.null(chromosomes))  
105
    chromosomes  <- levels(map$chr)
106
  
107
  # Repeat for each chromosome.
108
  scans        <- vector("list",length(chromosomes))
109
  names(scans) <- chromosomes
110
111
  for (chr in chromosomes) {
112
    # Compute the kinship matrix using all markers that are *not* on
113
    # the chromosome.
114
    cat("Mapping QTLs on chromosome ",chr,".\n",sep="")
115
    cat(" * Computing kinship matrix.\n")
116
    markers <- which(map$chr != chr)
117
    K <- tcrossprod(center.columns(geno[,markers])) / length(markers)
118
119
    # Save the kinship matrix to a text file.
120
    cat(" * Writing kinship matrix to file.\n")
121
    write.table(round(K,digits = 6),paste0(gemmadir,"/kinship.txt"),sep = " ",
122
                quote = FALSE,row.names = FALSE,col.names = FALSE)
123
  
124
    # Write out the mean genotypes and map information for all markers
125
    # on the chromosome.
126
    markers <- which(map$chr == chr)
127
    cat(" * Writing to file genotypes for ",length(markers),
128
        " markers on chromosome ",chr,".\n",sep="")
129
    write.gemma.geno(paste0(gemmadir,"/geno.txt"),geno[,markers],map[markers,])
130
    cat(" * Writing genetic map for",length(markers),
131
        "markers on chromosome",chr,"to file.\n")
132
    write.gemma.map(paste0(gemmadir,"/map.txt"),map[markers,])    
133
134
      
135
    # Set the local directory to the location of the GEMMA files.
136
    srcdir <- getwd()
137
    setwd(gemmadir)
138
      
139
    # Now we are finally ready to run GEMMA for all markers on the
140
    # chromosome using the kinship matrix computed using all the
141
    # markers *not* on the chromosome.
142
    cat(" * Computing p-values for ",length(markers),
143
        " markers on chromosome ",chr,".\n",sep="")
144
145
    system(paste(gemma.exe,"-g geno.txt -a map.txt -p pheno.txt",
146
                 "-c covariates.txt -k kinship.txt -notsnp -lmm 2",
147
                 "-lmin 0.01 -lmax 100"),
148
           ignore.stdout = TRUE)
149
      
150
    # Restore the working directory.
151
    setwd(srcdir)
152
      
153
    # Load the results of the GEMMA association analysis.
154
    scans[[chr]] <-
155
      read.gemma.assoc(paste0(gemmadir,"/output/result.assoc.txt"))
156
  }
157
158
159
  
160
  # Merge the mapping results from all chromosomes into a single table.
161
  gwscan           <- do.call(rbind,scans)
162
  rownames(gwscan) <- do.call(c,lapply(scans,rownames))
163
  class(gwscan)    <- c("scanone","data.frame")
164
165
  # Return the genome-wide scan.
166
  return(gwscan)
167
}
168
169
# ----------------------------------------------------------------------
170
# This function maps QTLs using GEMMA without the "realized
171
# relatedness" matrix to account for population structure.
172
run.gemma.norr <- function (phenotype, covariates, pheno, geno, map,
173
                            gemmadir, gemma.exe) {
174
175
176
  # Give summary of analysis.
177
  cat("Mapping QTLs for",phenotype,"in",nrow(pheno),"mice, ")
178
  if (!is.null(covariates)) {
179
    cat("controlling for ",paste(covariates,collapse=" + "),".\n",sep="")
180
  } else {
181
    cat("with no covariates included.\n")
182
  }
183
184
  # Write the phenotype and covariate data to separate files.
185
  cat("Writing phenotype and covariate data to file.\n")
186
  write.gemma.pheno(paste0(gemmadir,"/pheno.txt"),phenotype,pheno)
187
  write.gemma.covariates(paste0(gemmadir,"/covariates.txt"),covariates,pheno) 
188
189
  # Write out the mean genotypes and map information for all markers.
190
  cat("Writing SNP and genotype data to file.\n")
191
  write.gemma.map(paste0(gemmadir,"/map.txt"),map)
192
  write.gemma.geno(paste0(gemmadir,"/geno.txt"),geno,map)
193
  
194
  # Write out the kinship matrix to file.
195
  cat("Writing identity kinship matrix to file.\n");
196
  write.table(diag(nrow(pheno)),paste0(gemmadir,"/kinship.txt"),
197
              sep = " ",quote = FALSE,row.names = FALSE,
198
              col.names = FALSE)
199
  # cat("Writing full kinship matrix to file.\n");
200
  # K <- tcrossprod(center.columns(geno)) / nrow(map)
201
  # write.table(round(K,digits = 6),paste0(gemmadir,"/kinship.txt"),
202
  #             sep = " ",quote = FALSE,row.names = FALSE,
203
  #             col.names = FALSE)
204
205
  # Set the local directory to the location of the GEMMA files.
206
  srcdir <- getwd()
207
  setwd(gemmadir)
208
  # Now we are finally ready to run GEMMA for all markers.
209
  cat("Computing p-values for",nrow(map),"candidate markers.\n")
210
  system(paste(gemma.exe,"-g geno.txt -a map.txt -p pheno.txt",
211
               "-c covariates.txt -k kinship.txt -notsnp -lmm 2",
212
               "-lmin 0.01 -lmax 100"),
213
         ignore.stdout = TRUE)
214
215
  # Restore the working directory.
216
  setwd(srcdir)
217
218
  # Load the results of the GEMMA association analysis.
219
  gwscan <- read.gemma.assoc(paste0(gemmadir,"/output/result.assoc.txt"))
220
  class(gwscan) <- c("scanone","data.frame")
221
222
  # Restore the working directory.
223
  setwd(srcdir)
224
225
  # Return the genome-wide scan.
226
  return(gwscan)
227
}