[befbfc]: / research_paper_code / src / read.data.R

Download this file

100 lines (90 with data), 4.5 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# SUMMARY
# -------
# This file contains several functions for reading the QTL experiment
# data from text files. Here is an overview of the functions defined
# in this file:
#
# read.pheno(file)
# read.map(file)
# read.geno.dosage(file,n)
#
# FUNCTION DEFINITIONS
# ----------------------------------------------------------------------
# Loads the phenotype data stored in a CSV file.
read.pheno <- function (file) {
# Read in the phenotype data from the CSV file.
pheno <- read.csv(file,quote = "",header = TRUE,check.names = FALSE,
stringsAsFactors = FALSE,comment.char = "#")
# Convert some of the columns to factors.
pheno <- transform(pheno,
id = as.character(id),
round = factor(round,paste0("SW",1:25)),
FCbox = factor(FCbox),
PPIbox = factor(PPIbox),
methcage = factor(methcage),
methcycle = factor(methcycle),
discard = factor(discard),
mixup = factor(mixup),
earpunch = factor(earpunch),
abnormalbone = factor(abnormalbone),
experimenters = factor(experimenters))
# Convert some of the columns to double precision.
pheno <- transform(pheno,
fastglucose = as.double(fastglucose),
D1totaldist0to15 = as.double(D1totaldist0to15),
D1totaldist15to30 = as.double(D1totaldist15to30),
D1totaldist0to30 = as.double(D1totaldist0to30),
D2totaldist0to15 = as.double(D2totaldist0to15),
D2totaldist15to30 = as.double(D2totaldist15to30),
D2totaldist0to30 = as.double(D2totaldist0to30),
D3totaldist0to15 = as.double(D3totaldist0to15),
D3totaldist15to30 = as.double(D3totaldist15to30),
D3totaldist0to30 = as.double(D3totaldist0to30),
D1TOTDIST5 = as.double(D1TOTDIST5),
D1TOTDIST10 = as.double(D1TOTDIST10),
D1TOTDIST15 = as.double(D1TOTDIST15),
D1TOTDIST20 = as.double(D1TOTDIST20),
D1TOTDIST25 = as.double(D1TOTDIST25),
D1TOTDIST30 = as.double(D1TOTDIST30),
D2TOTDIST5 = as.double(D2TOTDIST5),
D2TOTDIST10 = as.double(D2TOTDIST10),
D2TOTDIST15 = as.double(D2TOTDIST15),
D2TOTDIST20 = as.double(D2TOTDIST20),
D2TOTDIST25 = as.double(D2TOTDIST25),
D2TOTDIST30 = as.double(D2TOTDIST30),
D3TOTDIST5 = as.double(D3TOTDIST5),
D3TOTDIST10 = as.double(D3TOTDIST10),
D3TOTDIST15 = as.double(D3TOTDIST15),
D3TOTDIST20 = as.double(D3TOTDIST20),
D3TOTDIST25 = as.double(D3TOTDIST25),
D3TOTDIST30 = as.double(D3TOTDIST30))
# Return the phenotype data table.
return(pheno)
}
# ----------------------------------------------------------------------
# Returns a data frame containing the marker data stored in a text
# file. Here I convert the chromosomes and alleles to factors manually
# to make sure that the chromosomes and bases are ordered properly in
# the factors.
read.map <- function (file) {
bases <- c("A","T","G","C")
map <- read.table(file,sep = " ",header = TRUE,comment.char = "#",
stringsAsFactors = FALSE)
return(transform(map,
chr = factor(chr,1:19),
ref = factor(ref,bases),
alt = factor(alt,bases)))
}
# ----------------------------------------------------------------------
# Returns a list object containing (1) a vector "discard" specifying
# samples that are from potentially mislabeled flow samples, (2) an n
# x p matrix of genotype "dosages" (expected allele counts), where n
# is the number of samples and p is the number of SNPs.
read.geno.dosage <- function (file, n) {
out <- fread(file,sep = " ",header = TRUE,showProgress = FALSE,
colClasses = c("character","character",rep("double",n)))
class(out) <- "data.frame"
rownames(out) <- out$id
return(list(discard = factor(out$discard),
geno = as.matrix(out[-(1:2)])))
}