Switch to side-by-side view

--- a
+++ b/research_paper_code/src/gemma.R
@@ -0,0 +1,227 @@
+# SUMMARY
+# -------
+# This file defines some functions that I use for QTL mapping in
+# GEMMA. Here is an overview of the functions defined in this file:
+#
+#   write.gemma.pheno(file,phenotype,pheno)
+#   write.gemma.covariates(file,covariates,pheno) 
+#   write.gemma.map(file,map)
+#   write.gemma.geno(file,geno,map)
+#   read.gemma.assoc(file)
+#   run.gemma(phenotype,covariates,pheno,geno,map,gemmadir,gemma.exe,
+#             chromosomes)
+#   run.gemma.norr(phenotype,covariates,pheno,geno,map,gemmadir,gemma.exe) 
+#
+# FUNCTION DEFINITIONS
+# ----------------------------------------------------------------------
+# Write the phenotype data to a file in the format used by GEMMA. Each
+# line of the file contains one phenotype observation.
+write.gemma.pheno <- function (file, phenotype, pheno) {
+  y <- pheno[phenotype]
+  if (is.numeric(y))
+    y <- round(y,digits = 6)
+  write.table(y,file,quote = FALSE,row.names = FALSE,col.names = FALSE)
+}
+
+# ----------------------------------------------------------------------
+# Write the covariate data to a file in the format used by GEMMA. Each
+# line corresponds to a sample. We must include an additional
+# covariate for the intercept.
+write.gemma.covariates <- function (file, covariates, pheno) {
+  if (is.null(covariates)) {
+    write.table(data.frame(rep(1,nrow(pheno))),file,sep = " ",
+                quote = FALSE,row.names = FALSE,col.names = FALSE)
+  } else {
+    write.table(cbind(1,data.frame(lapply(pheno[covariates],function (x) {
+      if (is.numeric(x))
+        round(x,digits=6)
+      else
+        x
+    }))),file,sep = " ",quote = FALSE,row.names = FALSE,col.names = FALSE)
+  }
+}
+
+# ----------------------------------------------------------------------
+# Write the SNP information to a space-delimited text file in the
+# format used by GEMMA. This file contains one line per SNP, with
+# three columns: (1) SNP label, (2) base-pair position, (3)
+# chromosome.
+write.gemma.map <- function (file, map)
+  write.table(map[c("id","pos","chr")],file,sep = " ",quote = FALSE,
+              row.names = FALSE,col.names = FALSE)
+
+# ----------------------------------------------------------------------
+# Store the mean genotypes as a space-delimited text file in the
+# format used by GEMMA, in which we have one row per SNP, and one
+# column per sample. The first three column give the SNP label, and
+# the two alleles.
+write.gemma.geno <- function (file, geno, map) {
+  geno <- t(geno)
+  geno <- as.data.frame(geno,check.names = FALSE)
+  geno <- round(geno,digits = 3)
+  geno <- cbind(map[c("id","ref","alt")],geno)
+  write.table(geno,file,sep = " ",quote = FALSE,row.names = FALSE,
+              col.names = FALSE)
+}
+
+# ----------------------------------------------------------------------
+# Reads in the association results from GEMMA, and returns a data
+# frame containing three columns: chromosome number ("chr"); base-pair
+# position ("pos"); and the base-10 logarithm of the p-value ("log10p").
+read.gemma.assoc <- function (file) {
+  gwscan <- read.table(file,sep = "\t",header = TRUE,check.names = FALSE,
+                  quote = "",stringsAsFactors = FALSE)
+  rownames(gwscan) <- gwscan$rs
+  gwscan           <- gwscan[c("chr","ps","p_lrt")]
+  gwscan           <- transform(gwscan,p_lrt = -log10(p_lrt))
+  colnames(gwscan) <- c("chr","pos","log10p")
+  return(gwscan)
+}
+
+# ----------------------------------------------------------------------
+# This function maps QTLs using GEMMA, writing all the files required
+# by GEMMA to the directory specified by "gemmadir". The QTLs are
+# mapped separately for each chromosome, in which the kinship matrix
+# is computed using all markers except the markers on the given
+# chromosome.
+run.gemma <- function (phenotype, covariates, pheno, geno, map,
+                       gemmadir, gemma.exe, chromosomes = NULL) {
+
+  # Give summary of analysis.
+  cat("Mapping QTLs for",phenotype,"in",nrow(pheno),"mice, ")
+  if (!is.null(covariates)) {
+    cat("controlling for ",paste(covariates,collapse=" + "),".\n",sep="")
+  } else {
+    cat("with no covariates included.\n")
+  }
+
+  # Write the phenotype and covariate data to separate files.
+  cat("Writing phenotype and covariate data to file.\n")
+  write.gemma.pheno(paste0(gemmadir,"/pheno.txt"),phenotype,pheno)
+  write.gemma.covariates(paste0(gemmadir,"/covariates.txt"),covariates,pheno) 
+
+  # We will map QTLs on these chromosomes.
+  if (is.null(chromosomes))  
+    chromosomes  <- levels(map$chr)
+  
+  # Repeat for each chromosome.
+  scans        <- vector("list",length(chromosomes))
+  names(scans) <- chromosomes
+
+  for (chr in chromosomes) {
+    # Compute the kinship matrix using all markers that are *not* on
+    # the chromosome.
+    cat("Mapping QTLs on chromosome ",chr,".\n",sep="")
+    cat(" * Computing kinship matrix.\n")
+    markers <- which(map$chr != chr)
+    K <- tcrossprod(center.columns(geno[,markers])) / length(markers)
+
+    # Save the kinship matrix to a text file.
+    cat(" * Writing kinship matrix to file.\n")
+    write.table(round(K,digits = 6),paste0(gemmadir,"/kinship.txt"),sep = " ",
+                quote = FALSE,row.names = FALSE,col.names = FALSE)
+  
+    # Write out the mean genotypes and map information for all markers
+    # on the chromosome.
+    markers <- which(map$chr == chr)
+    cat(" * Writing to file genotypes for ",length(markers),
+        " markers on chromosome ",chr,".\n",sep="")
+    write.gemma.geno(paste0(gemmadir,"/geno.txt"),geno[,markers],map[markers,])
+    cat(" * Writing genetic map for",length(markers),
+        "markers on chromosome",chr,"to file.\n")
+    write.gemma.map(paste0(gemmadir,"/map.txt"),map[markers,])    
+
+      
+    # Set the local directory to the location of the GEMMA files.
+    srcdir <- getwd()
+    setwd(gemmadir)
+      
+    # Now we are finally ready to run GEMMA for all markers on the
+    # chromosome using the kinship matrix computed using all the
+    # markers *not* on the chromosome.
+    cat(" * Computing p-values for ",length(markers),
+        " markers on chromosome ",chr,".\n",sep="")
+
+    system(paste(gemma.exe,"-g geno.txt -a map.txt -p pheno.txt",
+                 "-c covariates.txt -k kinship.txt -notsnp -lmm 2",
+                 "-lmin 0.01 -lmax 100"),
+           ignore.stdout = TRUE)
+      
+    # Restore the working directory.
+    setwd(srcdir)
+      
+    # Load the results of the GEMMA association analysis.
+    scans[[chr]] <-
+      read.gemma.assoc(paste0(gemmadir,"/output/result.assoc.txt"))
+  }
+
+
+  
+  # Merge the mapping results from all chromosomes into a single table.
+  gwscan           <- do.call(rbind,scans)
+  rownames(gwscan) <- do.call(c,lapply(scans,rownames))
+  class(gwscan)    <- c("scanone","data.frame")
+
+  # Return the genome-wide scan.
+  return(gwscan)
+}
+
+# ----------------------------------------------------------------------
+# This function maps QTLs using GEMMA without the "realized
+# relatedness" matrix to account for population structure.
+run.gemma.norr <- function (phenotype, covariates, pheno, geno, map,
+                            gemmadir, gemma.exe) {
+
+
+  # Give summary of analysis.
+  cat("Mapping QTLs for",phenotype,"in",nrow(pheno),"mice, ")
+  if (!is.null(covariates)) {
+    cat("controlling for ",paste(covariates,collapse=" + "),".\n",sep="")
+  } else {
+    cat("with no covariates included.\n")
+  }
+
+  # Write the phenotype and covariate data to separate files.
+  cat("Writing phenotype and covariate data to file.\n")
+  write.gemma.pheno(paste0(gemmadir,"/pheno.txt"),phenotype,pheno)
+  write.gemma.covariates(paste0(gemmadir,"/covariates.txt"),covariates,pheno) 
+
+  # Write out the mean genotypes and map information for all markers.
+  cat("Writing SNP and genotype data to file.\n")
+  write.gemma.map(paste0(gemmadir,"/map.txt"),map)
+  write.gemma.geno(paste0(gemmadir,"/geno.txt"),geno,map)
+  
+  # Write out the kinship matrix to file.
+  cat("Writing identity kinship matrix to file.\n");
+  write.table(diag(nrow(pheno)),paste0(gemmadir,"/kinship.txt"),
+              sep = " ",quote = FALSE,row.names = FALSE,
+              col.names = FALSE)
+  # cat("Writing full kinship matrix to file.\n");
+  # K <- tcrossprod(center.columns(geno)) / nrow(map)
+  # write.table(round(K,digits = 6),paste0(gemmadir,"/kinship.txt"),
+  #             sep = " ",quote = FALSE,row.names = FALSE,
+  #             col.names = FALSE)
+
+  # Set the local directory to the location of the GEMMA files.
+  srcdir <- getwd()
+  setwd(gemmadir)
+  # Now we are finally ready to run GEMMA for all markers.
+  cat("Computing p-values for",nrow(map),"candidate markers.\n")
+  system(paste(gemma.exe,"-g geno.txt -a map.txt -p pheno.txt",
+               "-c covariates.txt -k kinship.txt -notsnp -lmm 2",
+               "-lmin 0.01 -lmax 100"),
+         ignore.stdout = TRUE)
+
+  # Restore the working directory.
+  setwd(srcdir)
+
+  # Load the results of the GEMMA association analysis.
+  gwscan <- read.gemma.assoc(paste0(gemmadir,"/output/result.assoc.txt"))
+  class(gwscan) <- c("scanone","data.frame")
+
+  # Restore the working directory.
+  setwd(srcdir)
+
+  # Return the genome-wide scan.
+  return(gwscan)
+}