--- a +++ b/research_paper_code/src/gemma.R @@ -0,0 +1,227 @@ +# SUMMARY +# ------- +# This file defines some functions that I use for QTL mapping in +# GEMMA. Here is an overview of the functions defined in this file: +# +# write.gemma.pheno(file,phenotype,pheno) +# write.gemma.covariates(file,covariates,pheno) +# write.gemma.map(file,map) +# write.gemma.geno(file,geno,map) +# read.gemma.assoc(file) +# run.gemma(phenotype,covariates,pheno,geno,map,gemmadir,gemma.exe, +# chromosomes) +# run.gemma.norr(phenotype,covariates,pheno,geno,map,gemmadir,gemma.exe) +# +# FUNCTION DEFINITIONS +# ---------------------------------------------------------------------- +# Write the phenotype data to a file in the format used by GEMMA. Each +# line of the file contains one phenotype observation. +write.gemma.pheno <- function (file, phenotype, pheno) { + y <- pheno[phenotype] + if (is.numeric(y)) + y <- round(y,digits = 6) + write.table(y,file,quote = FALSE,row.names = FALSE,col.names = FALSE) +} + +# ---------------------------------------------------------------------- +# Write the covariate data to a file in the format used by GEMMA. Each +# line corresponds to a sample. We must include an additional +# covariate for the intercept. +write.gemma.covariates <- function (file, covariates, pheno) { + if (is.null(covariates)) { + write.table(data.frame(rep(1,nrow(pheno))),file,sep = " ", + quote = FALSE,row.names = FALSE,col.names = FALSE) + } else { + write.table(cbind(1,data.frame(lapply(pheno[covariates],function (x) { + if (is.numeric(x)) + round(x,digits=6) + else + x + }))),file,sep = " ",quote = FALSE,row.names = FALSE,col.names = FALSE) + } +} + +# ---------------------------------------------------------------------- +# Write the SNP information to a space-delimited text file in the +# format used by GEMMA. This file contains one line per SNP, with +# three columns: (1) SNP label, (2) base-pair position, (3) +# chromosome. +write.gemma.map <- function (file, map) + write.table(map[c("id","pos","chr")],file,sep = " ",quote = FALSE, + row.names = FALSE,col.names = FALSE) + +# ---------------------------------------------------------------------- +# Store the mean genotypes as a space-delimited text file in the +# format used by GEMMA, in which we have one row per SNP, and one +# column per sample. The first three column give the SNP label, and +# the two alleles. +write.gemma.geno <- function (file, geno, map) { + geno <- t(geno) + geno <- as.data.frame(geno,check.names = FALSE) + geno <- round(geno,digits = 3) + geno <- cbind(map[c("id","ref","alt")],geno) + write.table(geno,file,sep = " ",quote = FALSE,row.names = FALSE, + col.names = FALSE) +} + +# ---------------------------------------------------------------------- +# Reads in the association results from GEMMA, and returns a data +# frame containing three columns: chromosome number ("chr"); base-pair +# position ("pos"); and the base-10 logarithm of the p-value ("log10p"). +read.gemma.assoc <- function (file) { + gwscan <- read.table(file,sep = "\t",header = TRUE,check.names = FALSE, + quote = "",stringsAsFactors = FALSE) + rownames(gwscan) <- gwscan$rs + gwscan <- gwscan[c("chr","ps","p_lrt")] + gwscan <- transform(gwscan,p_lrt = -log10(p_lrt)) + colnames(gwscan) <- c("chr","pos","log10p") + return(gwscan) +} + +# ---------------------------------------------------------------------- +# This function maps QTLs using GEMMA, writing all the files required +# by GEMMA to the directory specified by "gemmadir". The QTLs are +# mapped separately for each chromosome, in which the kinship matrix +# is computed using all markers except the markers on the given +# chromosome. +run.gemma <- function (phenotype, covariates, pheno, geno, map, + gemmadir, gemma.exe, chromosomes = NULL) { + + # Give summary of analysis. + cat("Mapping QTLs for",phenotype,"in",nrow(pheno),"mice, ") + if (!is.null(covariates)) { + cat("controlling for ",paste(covariates,collapse=" + "),".\n",sep="") + } else { + cat("with no covariates included.\n") + } + + # Write the phenotype and covariate data to separate files. + cat("Writing phenotype and covariate data to file.\n") + write.gemma.pheno(paste0(gemmadir,"/pheno.txt"),phenotype,pheno) + write.gemma.covariates(paste0(gemmadir,"/covariates.txt"),covariates,pheno) + + # We will map QTLs on these chromosomes. + if (is.null(chromosomes)) + chromosomes <- levels(map$chr) + + # Repeat for each chromosome. + scans <- vector("list",length(chromosomes)) + names(scans) <- chromosomes + + for (chr in chromosomes) { + # Compute the kinship matrix using all markers that are *not* on + # the chromosome. + cat("Mapping QTLs on chromosome ",chr,".\n",sep="") + cat(" * Computing kinship matrix.\n") + markers <- which(map$chr != chr) + K <- tcrossprod(center.columns(geno[,markers])) / length(markers) + + # Save the kinship matrix to a text file. + cat(" * Writing kinship matrix to file.\n") + write.table(round(K,digits = 6),paste0(gemmadir,"/kinship.txt"),sep = " ", + quote = FALSE,row.names = FALSE,col.names = FALSE) + + # Write out the mean genotypes and map information for all markers + # on the chromosome. + markers <- which(map$chr == chr) + cat(" * Writing to file genotypes for ",length(markers), + " markers on chromosome ",chr,".\n",sep="") + write.gemma.geno(paste0(gemmadir,"/geno.txt"),geno[,markers],map[markers,]) + cat(" * Writing genetic map for",length(markers), + "markers on chromosome",chr,"to file.\n") + write.gemma.map(paste0(gemmadir,"/map.txt"),map[markers,]) + + + # Set the local directory to the location of the GEMMA files. + srcdir <- getwd() + setwd(gemmadir) + + # Now we are finally ready to run GEMMA for all markers on the + # chromosome using the kinship matrix computed using all the + # markers *not* on the chromosome. + cat(" * Computing p-values for ",length(markers), + " markers on chromosome ",chr,".\n",sep="") + + system(paste(gemma.exe,"-g geno.txt -a map.txt -p pheno.txt", + "-c covariates.txt -k kinship.txt -notsnp -lmm 2", + "-lmin 0.01 -lmax 100"), + ignore.stdout = TRUE) + + # Restore the working directory. + setwd(srcdir) + + # Load the results of the GEMMA association analysis. + scans[[chr]] <- + read.gemma.assoc(paste0(gemmadir,"/output/result.assoc.txt")) + } + + + + # Merge the mapping results from all chromosomes into a single table. + gwscan <- do.call(rbind,scans) + rownames(gwscan) <- do.call(c,lapply(scans,rownames)) + class(gwscan) <- c("scanone","data.frame") + + # Return the genome-wide scan. + return(gwscan) +} + +# ---------------------------------------------------------------------- +# This function maps QTLs using GEMMA without the "realized +# relatedness" matrix to account for population structure. +run.gemma.norr <- function (phenotype, covariates, pheno, geno, map, + gemmadir, gemma.exe) { + + + # Give summary of analysis. + cat("Mapping QTLs for",phenotype,"in",nrow(pheno),"mice, ") + if (!is.null(covariates)) { + cat("controlling for ",paste(covariates,collapse=" + "),".\n",sep="") + } else { + cat("with no covariates included.\n") + } + + # Write the phenotype and covariate data to separate files. + cat("Writing phenotype and covariate data to file.\n") + write.gemma.pheno(paste0(gemmadir,"/pheno.txt"),phenotype,pheno) + write.gemma.covariates(paste0(gemmadir,"/covariates.txt"),covariates,pheno) + + # Write out the mean genotypes and map information for all markers. + cat("Writing SNP and genotype data to file.\n") + write.gemma.map(paste0(gemmadir,"/map.txt"),map) + write.gemma.geno(paste0(gemmadir,"/geno.txt"),geno,map) + + # Write out the kinship matrix to file. + cat("Writing identity kinship matrix to file.\n"); + write.table(diag(nrow(pheno)),paste0(gemmadir,"/kinship.txt"), + sep = " ",quote = FALSE,row.names = FALSE, + col.names = FALSE) + # cat("Writing full kinship matrix to file.\n"); + # K <- tcrossprod(center.columns(geno)) / nrow(map) + # write.table(round(K,digits = 6),paste0(gemmadir,"/kinship.txt"), + # sep = " ",quote = FALSE,row.names = FALSE, + # col.names = FALSE) + + # Set the local directory to the location of the GEMMA files. + srcdir <- getwd() + setwd(gemmadir) + # Now we are finally ready to run GEMMA for all markers. + cat("Computing p-values for",nrow(map),"candidate markers.\n") + system(paste(gemma.exe,"-g geno.txt -a map.txt -p pheno.txt", + "-c covariates.txt -k kinship.txt -notsnp -lmm 2", + "-lmin 0.01 -lmax 100"), + ignore.stdout = TRUE) + + # Restore the working directory. + setwd(srcdir) + + # Load the results of the GEMMA association analysis. + gwscan <- read.gemma.assoc(paste0(gemmadir,"/output/result.assoc.txt")) + class(gwscan) <- c("scanone","data.frame") + + # Restore the working directory. + setwd(srcdir) + + # Return the genome-wide scan. + return(gwscan) +}