|
a |
|
b/research_paper_code/src/read.data.R |
|
|
1 |
# SUMMARY |
|
|
2 |
# ------- |
|
|
3 |
# This file contains several functions for reading the QTL experiment |
|
|
4 |
# data from text files. Here is an overview of the functions defined |
|
|
5 |
# in this file: |
|
|
6 |
# |
|
|
7 |
# read.pheno(file) |
|
|
8 |
# read.map(file) |
|
|
9 |
# read.geno.dosage(file,n) |
|
|
10 |
# |
|
|
11 |
# FUNCTION DEFINITIONS |
|
|
12 |
# ---------------------------------------------------------------------- |
|
|
13 |
# Loads the phenotype data stored in a CSV file. |
|
|
14 |
read.pheno <- function (file) { |
|
|
15 |
|
|
|
16 |
# Read in the phenotype data from the CSV file. |
|
|
17 |
pheno <- read.csv(file,quote = "",header = TRUE,check.names = FALSE, |
|
|
18 |
stringsAsFactors = FALSE,comment.char = "#") |
|
|
19 |
|
|
|
20 |
# Convert some of the columns to factors. |
|
|
21 |
pheno <- transform(pheno, |
|
|
22 |
id = as.character(id), |
|
|
23 |
round = factor(round,paste0("SW",1:25)), |
|
|
24 |
FCbox = factor(FCbox), |
|
|
25 |
PPIbox = factor(PPIbox), |
|
|
26 |
methcage = factor(methcage), |
|
|
27 |
methcycle = factor(methcycle), |
|
|
28 |
discard = factor(discard), |
|
|
29 |
mixup = factor(mixup), |
|
|
30 |
earpunch = factor(earpunch), |
|
|
31 |
abnormalbone = factor(abnormalbone), |
|
|
32 |
experimenters = factor(experimenters)) |
|
|
33 |
|
|
|
34 |
# Convert some of the columns to double precision. |
|
|
35 |
pheno <- transform(pheno, |
|
|
36 |
fastglucose = as.double(fastglucose), |
|
|
37 |
D1totaldist0to15 = as.double(D1totaldist0to15), |
|
|
38 |
D1totaldist15to30 = as.double(D1totaldist15to30), |
|
|
39 |
D1totaldist0to30 = as.double(D1totaldist0to30), |
|
|
40 |
D2totaldist0to15 = as.double(D2totaldist0to15), |
|
|
41 |
D2totaldist15to30 = as.double(D2totaldist15to30), |
|
|
42 |
D2totaldist0to30 = as.double(D2totaldist0to30), |
|
|
43 |
D3totaldist0to15 = as.double(D3totaldist0to15), |
|
|
44 |
D3totaldist15to30 = as.double(D3totaldist15to30), |
|
|
45 |
D3totaldist0to30 = as.double(D3totaldist0to30), |
|
|
46 |
|
|
|
47 |
D1TOTDIST5 = as.double(D1TOTDIST5), |
|
|
48 |
D1TOTDIST10 = as.double(D1TOTDIST10), |
|
|
49 |
D1TOTDIST15 = as.double(D1TOTDIST15), |
|
|
50 |
D1TOTDIST20 = as.double(D1TOTDIST20), |
|
|
51 |
D1TOTDIST25 = as.double(D1TOTDIST25), |
|
|
52 |
D1TOTDIST30 = as.double(D1TOTDIST30), |
|
|
53 |
|
|
|
54 |
D2TOTDIST5 = as.double(D2TOTDIST5), |
|
|
55 |
D2TOTDIST10 = as.double(D2TOTDIST10), |
|
|
56 |
D2TOTDIST15 = as.double(D2TOTDIST15), |
|
|
57 |
D2TOTDIST20 = as.double(D2TOTDIST20), |
|
|
58 |
D2TOTDIST25 = as.double(D2TOTDIST25), |
|
|
59 |
D2TOTDIST30 = as.double(D2TOTDIST30), |
|
|
60 |
|
|
|
61 |
D3TOTDIST5 = as.double(D3TOTDIST5), |
|
|
62 |
D3TOTDIST10 = as.double(D3TOTDIST10), |
|
|
63 |
D3TOTDIST15 = as.double(D3TOTDIST15), |
|
|
64 |
D3TOTDIST20 = as.double(D3TOTDIST20), |
|
|
65 |
D3TOTDIST25 = as.double(D3TOTDIST25), |
|
|
66 |
D3TOTDIST30 = as.double(D3TOTDIST30)) |
|
|
67 |
|
|
|
68 |
# Return the phenotype data table. |
|
|
69 |
return(pheno) |
|
|
70 |
} |
|
|
71 |
|
|
|
72 |
# ---------------------------------------------------------------------- |
|
|
73 |
# Returns a data frame containing the marker data stored in a text |
|
|
74 |
# file. Here I convert the chromosomes and alleles to factors manually |
|
|
75 |
# to make sure that the chromosomes and bases are ordered properly in |
|
|
76 |
# the factors. |
|
|
77 |
read.map <- function (file) { |
|
|
78 |
bases <- c("A","T","G","C") |
|
|
79 |
map <- read.table(file,sep = " ",header = TRUE,comment.char = "#", |
|
|
80 |
stringsAsFactors = FALSE) |
|
|
81 |
return(transform(map, |
|
|
82 |
chr = factor(chr,1:19), |
|
|
83 |
ref = factor(ref,bases), |
|
|
84 |
alt = factor(alt,bases))) |
|
|
85 |
} |
|
|
86 |
|
|
|
87 |
# ---------------------------------------------------------------------- |
|
|
88 |
# Returns a list object containing (1) a vector "discard" specifying |
|
|
89 |
# samples that are from potentially mislabeled flow samples, (2) an n |
|
|
90 |
# x p matrix of genotype "dosages" (expected allele counts), where n |
|
|
91 |
# is the number of samples and p is the number of SNPs. |
|
|
92 |
read.geno.dosage <- function (file, n) { |
|
|
93 |
out <- fread(file,sep = " ",header = TRUE,showProgress = FALSE, |
|
|
94 |
colClasses = c("character","character",rep("double",n))) |
|
|
95 |
class(out) <- "data.frame" |
|
|
96 |
rownames(out) <- out$id |
|
|
97 |
return(list(discard = factor(out$discard), |
|
|
98 |
geno = as.matrix(out[-(1:2)]))) |
|
|
99 |
} |