|
a |
|
b/lib/handy.R |
|
|
1 |
################################################################################ |
|
|
2 |
### VARIABLE DEFINITION ###################################################### |
|
|
3 |
################################################################################ |
|
|
4 |
default.logfile.name <- 'log.txt' |
|
|
5 |
|
|
|
6 |
################################################################################ |
|
|
7 |
### DATA FRAMES ############################################################## |
|
|
8 |
################################################################################ |
|
|
9 |
|
|
|
10 |
sample.df <- function(df, size = 1, replace = FALSE, prob = NULL) { |
|
|
11 |
# Samples rows from a data frame. |
|
|
12 |
# |
|
|
13 |
# Args: |
|
|
14 |
# df: A data frame. |
|
|
15 |
# size, replace, prob: Arguments from the sample function. |
|
|
16 |
# |
|
|
17 |
# Returns: |
|
|
18 |
# size rows from this data frame (default 1), with or without replacement |
|
|
19 |
# and with an optional probability vector. |
|
|
20 |
df[sample(nrow(df), size, replace, prob), , drop = FALSE] |
|
|
21 |
} |
|
|
22 |
|
|
|
23 |
bootstrapSampleDf <- function(df) { |
|
|
24 |
# Quick wrapper function for simple bootstrap sampling of a data frame. |
|
|
25 |
# |
|
|
26 |
# Args: |
|
|
27 |
# df: A data frame. |
|
|
28 |
# |
|
|
29 |
# Returns: |
|
|
30 |
# A data frame with the same number of rows as the original, but randomly |
|
|
31 |
# sampled with replacement. |
|
|
32 |
sample.df(df, size = nrow(df), replace = TRUE) |
|
|
33 |
} |
|
|
34 |
|
|
|
35 |
withoutCols <- function(df, cols) { |
|
|
36 |
# Returns a vector to allow certain columns to be excluded from a |
|
|
37 |
# data frame. The case where the columns being excluded are referred to |
|
|
38 |
# numerically is trivial, but is included as well for generality. |
|
|
39 |
# |
|
|
40 |
# Args: |
|
|
41 |
# df: a data frame. |
|
|
42 |
# cols: the columns to be excluded, as a vector or scalar of number(s) or |
|
|
43 |
# name(s). |
|
|
44 |
# |
|
|
45 |
# Returns: |
|
|
46 |
# In the text case, a vector of booleans; TRUE for columns to include. |
|
|
47 |
# In the numerical case, R understands df[-3,], so just minus the input. |
|
|
48 |
if(is.character(cols)) { return(!(names(df) %in% cols)) } |
|
|
49 |
else { return(-cols) } |
|
|
50 |
} |
|
|
51 |
|
|
|
52 |
explodeByCol <- function(df, cols, sep=',', regex = NULL, |
|
|
53 |
fixed = TRUE) { |
|
|
54 |
# If your data frame contains multiple values in a single column, this splits |
|
|
55 |
# multiple values across different rows, using either a separator character or |
|
|
56 |
# a regular expression. |
|
|
57 |
# |
|
|
58 |
# Args: |
|
|
59 |
# df: a data frame. |
|
|
60 |
# cols: one or more column(s) to explode by. |
|
|
61 |
# sep: the separator of the multiple values. |
|
|
62 |
# regex: a regular expression which matches the values you're looking for; |
|
|
63 |
# overrides sep. |
|
|
64 |
# fixed: for strsplit, this variable determines whether the string passed |
|
|
65 |
# in sep is fixed (TRUE) or a regular expression (FALSE). |
|
|
66 |
# |
|
|
67 |
# Returns: |
|
|
68 |
# A data frame with new rows, one for each value in the exploded column. |
|
|
69 |
|
|
|
70 |
# data frames fairly often come in with 'character' columns which are factors, |
|
|
71 |
# and these string-based functions can't handle that, so convert with a |
|
|
72 |
# warning |
|
|
73 |
|
|
|
74 |
# instantiate a list to contain the exploded output |
|
|
75 |
exploded <- list() |
|
|
76 |
n.exploded <- list() |
|
|
77 |
|
|
|
78 |
for(col in cols) { |
|
|
79 |
if(is.factor(df[, col])) { |
|
|
80 |
warning( |
|
|
81 |
paste0('Column ', col, ' is a factor, and has been coerced ', |
|
|
82 |
'to a character for exploding.') |
|
|
83 |
) |
|
|
84 |
df[, col] <- as.character(df[, col]) |
|
|
85 |
} else if(!is.character(df[, col])) { |
|
|
86 |
# if it's not character data, it won't work, so pass an error |
|
|
87 |
stop( |
|
|
88 |
paste0('Column ', col, ' passed to explodeByType should be character ', |
|
|
89 |
'data; it is of class ', class(df[, col]), '.' |
|
|
90 |
) |
|
|
91 |
) |
|
|
92 |
} |
|
|
93 |
# if regex is NULL, use the separator provided |
|
|
94 |
if(is.null(regex)) { |
|
|
95 |
exploded[col] <- list(strsplit(df[, col], sep, fixed = fixed)) |
|
|
96 |
# otherwise, use a regular expression to split the column |
|
|
97 |
} else { |
|
|
98 |
exploded[col] <- list(regmatches(df[, col], gregexpr(regex, df[, col]))) |
|
|
99 |
} |
|
|
100 |
# how many of each row should I create? ie 1,1,2,1,0 |
|
|
101 |
n.exploded[[col]] <- sapply(exploded[[col]], length) |
|
|
102 |
} |
|
|
103 |
|
|
|
104 |
# check the n.exploded values are the same for all columns |
|
|
105 |
if(!allSame(n.exploded)) { |
|
|
106 |
stop( |
|
|
107 |
paste0('The columns provided have inconsistent numbers of elements ', |
|
|
108 |
'after exploding.' |
|
|
109 |
) |
|
|
110 |
) |
|
|
111 |
} |
|
|
112 |
# turn the first element of n.exploded into a list of data frame row indices, |
|
|
113 |
# ie 1,2,3,3,4 |
|
|
114 |
n.exploded.rows <- rep(1:length(n.exploded[[cols[1]]]), n.exploded[[cols[1]]]) |
|
|
115 |
|
|
|
116 |
# take the data frame and repeat rows the relevant number of times |
|
|
117 |
df <- df[n.exploded.rows, ] |
|
|
118 |
# fill its exploded column(s) with the appropriate values |
|
|
119 |
for(col in cols) { |
|
|
120 |
df[, col] <- unlist(exploded[[col]]) |
|
|
121 |
} |
|
|
122 |
df |
|
|
123 |
} |
|
|
124 |
|
|
|
125 |
################################################################################ |
|
|
126 |
### CHARACTERS ############################################################### |
|
|
127 |
################################################################################ |
|
|
128 |
|
|
|
129 |
removeWhitespace <- function(x) { gsub("\\s","", x) } |
|
|
130 |
# For a character or vector of characters x, removes all spaces and line |
|
|
131 |
# breaks. |
|
|
132 |
# |
|
|
133 |
# Args: |
|
|
134 |
# x: A character or vector of characters. |
|
|
135 |
# |
|
|
136 |
# Returns: |
|
|
137 |
# The character with whitespace removed. |
|
|
138 |
|
|
|
139 |
pastePlus <- function(..., sep=" ", collapse = NULL, recycleZeroLength = TRUE) { |
|
|
140 |
# Version of the base R paste function which optionally returns nothing if any |
|
|
141 |
# of the ...s being concatenated have zero length. (Default behaviour is to |
|
|
142 |
# recycle them to "".) |
|
|
143 |
# |
|
|
144 |
# Args: |
|
|
145 |
# ..., sep, collapse: as paste in base R |
|
|
146 |
# ignoreZeroLength: |
|
|
147 |
# |
|
|
148 |
# Returns: |
|
|
149 |
# If any of the passed objects has zero length, NULL; otherwise, the |
|
|
150 |
# result of the paste function. |
|
|
151 |
if(!recycleZeroLength & |
|
|
152 |
any(lapply(list(...), length) == 0)) { |
|
|
153 |
return(NULL); |
|
|
154 |
} |
|
|
155 |
paste(..., sep = sep, collapse = collapse) |
|
|
156 |
} |
|
|
157 |
|
|
|
158 |
paste0Plus <- function(..., collapse = NULL, recycleZeroLength = TRUE) { |
|
|
159 |
pastePlus(..., sep="", collapse = collapse, |
|
|
160 |
recycleZeroLength = recycleZeroLength) |
|
|
161 |
} |
|
|
162 |
|
|
|
163 |
strPos <- function(..., fixed = TRUE) { |
|
|
164 |
# Wrapper function which returns the positions of the first occurrence of a |
|
|
165 |
# pattern in some text. Simplifies regexpr which returns a variety of things |
|
|
166 |
# other than simply the position. Defaults to fixed rather than regular |
|
|
167 |
# expression searching. |
|
|
168 |
# |
|
|
169 |
# Args: |
|
|
170 |
# ...: see grep in base R; usually (pattern, text) |
|
|
171 |
# fixed: Logical. If true, pattern is matched as-is. |
|
|
172 |
# |
|
|
173 |
# Returns: |
|
|
174 |
# The position of the first occurrence of the pattern in the text. |
|
|
175 |
regexpr(..., fixed = fixed)[1] |
|
|
176 |
} |
|
|
177 |
|
|
|
178 |
startsWithAny <- function(x, prefixes) { |
|
|
179 |
# Function extending startsWith for use with a vector of many prefixes. |
|
|
180 |
# |
|
|
181 |
# Args: |
|
|
182 |
# x: Vector of characters whose starts will be examined. |
|
|
183 |
# prefixes: Vector of characters which may be those starts. |
|
|
184 |
# |
|
|
185 |
# Returns: |
|
|
186 |
# A logical vector denoting whether a given string in x starts with any |
|
|
187 |
# of the prefixes provided. |
|
|
188 |
apply( |
|
|
189 |
# sapply startsWith over all prefixes, giving a table of logicals |
|
|
190 |
sapply( |
|
|
191 |
prefixes, |
|
|
192 |
function(prefix) { |
|
|
193 |
startsWith(x, prefix) |
|
|
194 |
} |
|
|
195 |
), |
|
|
196 |
# ...then, apply a massive logical 'or' over the rows of that table |
|
|
197 |
MARGIN = 1, FUN = any |
|
|
198 |
) |
|
|
199 |
} |
|
|
200 |
|
|
|
201 |
textAfter <- function(x, prefix) { |
|
|
202 |
# Find and return text from strings starting with a prefix, after that prefix. |
|
|
203 |
# |
|
|
204 |
# Args: |
|
|
205 |
# x: Vector of characters to examine. |
|
|
206 |
# prefix: Single character string to search for and then discard where |
|
|
207 |
# present. Accepts a vector as startsWith, but this would be a |
|
|
208 |
# slightly strange use-case. |
|
|
209 |
# |
|
|
210 |
# Returns: |
|
|
211 |
# A character vector of pieces of text which occur after the prefix |
|
|
212 |
# specified. eg textAfter(c('a1', 'a2', 'b1'), 'a') would return |
|
|
213 |
# c('1', '2'). |
|
|
214 |
i <- startsWith(x, prefix) |
|
|
215 |
substr(x[i], nchar(prefix) + 1, nchar(x[i])) |
|
|
216 |
} |
|
|
217 |
|
|
|
218 |
randomString <- function(l, characters = letters, disallowed = NULL) { |
|
|
219 |
# Generate a random string, with optional excision of disallowed sequences. |
|
|
220 |
# |
|
|
221 |
# Args: |
|
|
222 |
# l: The length of the string to generate in number of components |
|
|
223 |
# (usually single characters, see below) |
|
|
224 |
# characters: Either a string eg 'Argh' which will be split into individual |
|
|
225 |
# characters to act as string components, or a vector of components; |
|
|
226 |
# no check is made so these can be multi-character |
|
|
227 |
# disallowed: A vector of disallowed sequences. Defaults to NULL, which |
|
|
228 |
# lets anything through. |
|
|
229 |
# |
|
|
230 |
# Returns: |
|
|
231 |
# A string of length l picked from the characters provided, without any |
|
|
232 |
# disallowed strings. |
|
|
233 |
|
|
|
234 |
|
|
|
235 |
# if passed a single-element vector, it's almost certain they don't want a |
|
|
236 |
# single string repeated 'randomly' over and over, so split it into characters |
|
|
237 |
if(length(characters == 1)) { |
|
|
238 |
characters <- unlist(strsplit(characters, '')) |
|
|
239 |
} |
|
|
240 |
# generate a random string by sampling from characters |
|
|
241 |
random.string <- paste0( |
|
|
242 |
sample( |
|
|
243 |
characters, |
|
|
244 |
l, |
|
|
245 |
replace = TRUE |
|
|
246 |
), |
|
|
247 |
collapse='' |
|
|
248 |
) |
|
|
249 |
# if they've passed a disallowed vector, let's check none of the parts of the |
|
|
250 |
# string contain it |
|
|
251 |
if(!is.null(disallowed)) { |
|
|
252 |
# loop over elements of disallowed |
|
|
253 |
for(not.allowed in disallowed) { |
|
|
254 |
# find matches of the forbidden string |
|
|
255 |
not.allowed.matches <- gregexpr(not.allowed, random.string) |
|
|
256 |
# if some matches are found... |
|
|
257 |
while(not.allowed.matches[[1]][1] != -1) { |
|
|
258 |
# ...loop over them, getting rid of one at a time |
|
|
259 |
for(i in 1:length(not.allowed.matches[[1]])) { |
|
|
260 |
substring( |
|
|
261 |
random.string, |
|
|
262 |
# start at the match point |
|
|
263 |
not.allowed.matches[[1]][i], |
|
|
264 |
# end at match point plus match length |
|
|
265 |
not.allowed.matches[[1]][i] + attr(not.allowed.matches[[1]], 'match.length')[i] |
|
|
266 |
) <- |
|
|
267 |
# and what better to replace them with than a string generated at |
|
|
268 |
# random with this very function! |
|
|
269 |
randomString( |
|
|
270 |
attr(not.allowed.matches[[1]], 'match.length')[i], |
|
|
271 |
characters, |
|
|
272 |
disallowed |
|
|
273 |
) |
|
|
274 |
} |
|
|
275 |
# and then perform the test again to make sure we didn't introduce |
|
|
276 |
# any unexpected disallowed patterns with the replacements... |
|
|
277 |
not.allowed.matches <- gregexpr(not.allowed, random.string) |
|
|
278 |
} |
|
|
279 |
} |
|
|
280 |
} |
|
|
281 |
# return the random string |
|
|
282 |
random.string |
|
|
283 |
} |
|
|
284 |
|
|
|
285 |
randomStrings <- function(n, l, characters = letters, disallowed = NULL) { |
|
|
286 |
# Generate n random strings; wrapper for the randomString function. |
|
|
287 |
# |
|
|
288 |
# Args: |
|
|
289 |
# n: Number of random strings to generate |
|
|
290 |
# ...: For other arguments, see randomString |
|
|
291 |
# |
|
|
292 |
# Returns: |
|
|
293 |
# n random strings with the specified properties |
|
|
294 |
replicate(n, randomString(l, characters, disallowed)) |
|
|
295 |
} |
|
|
296 |
|
|
|
297 |
################################################################################ |
|
|
298 |
### FACTORS ################################################################## |
|
|
299 |
################################################################################ |
|
|
300 |
|
|
|
301 |
concatFactors <- function(...) { |
|
|
302 |
# Takes some factors and concatenates them. R coerces factors to integers if |
|
|
303 |
# you don't convert them to character vectors at the intermediate stage, so |
|
|
304 |
# this saves typing that every time. |
|
|
305 |
# |
|
|
306 |
# Args: |
|
|
307 |
# ...: Some factors |
|
|
308 |
# |
|
|
309 |
# Returns: |
|
|
310 |
# A big factor. |
|
|
311 |
factor(unlist(lapply(list(...), FUN=as.character))) |
|
|
312 |
} |
|
|
313 |
|
|
|
314 |
factorChooseFirst <- function(x, first) { |
|
|
315 |
# Move a chosen level to be the first in a factor. |
|
|
316 |
# |
|
|
317 |
# Args: |
|
|
318 |
# x: A factor. |
|
|
319 |
# first: The level in the factor you want to be first. |
|
|
320 |
# |
|
|
321 |
# Returns: |
|
|
322 |
# A factor with the first level redefined to be the one specified. |
|
|
323 |
|
|
|
324 |
# if the level requested to be first isn't present, this ain't gonna work |
|
|
325 |
if (!(first %in% levels(x))) { |
|
|
326 |
stop(paste("Error: the level", first, "doesn't appear in the factor", |
|
|
327 |
deparse(substitute(x)))) |
|
|
328 |
} |
|
|
329 |
factor(x, levels = c(first, levels(x)[levels(x) != first])) |
|
|
330 |
} |
|
|
331 |
|
|
|
332 |
factorNAfix <- function(x, NAval = 'NA', force = FALSE) { |
|
|
333 |
# Make NA values in a factor into their own level. |
|
|
334 |
# |
|
|
335 |
# Args: |
|
|
336 |
# x: A factor. |
|
|
337 |
# NAval: The value to replace NAs with. The string 'NA' by default. |
|
|
338 |
# force: Whether to force the operation even if there aren't any NAs in |
|
|
339 |
# the passed factor. |
|
|
340 |
# |
|
|
341 |
# Returns: |
|
|
342 |
# A factor with NAs replaced by a specific level. |
|
|
343 |
|
|
|
344 |
# if it's forced, or if it's not but there are NAs present... |
|
|
345 |
if(force | sum(is.na(x)) > 0) { |
|
|
346 |
levels(x) <- c(levels(x), NAval) |
|
|
347 |
x[is.na(x)] <- NAval |
|
|
348 |
} |
|
|
349 |
x |
|
|
350 |
} |
|
|
351 |
|
|
|
352 |
factorOrderedLevels <- function(x, ...) { |
|
|
353 |
# Create a factor with levels in the order of the |
|
|
354 |
# |
|
|
355 |
# Args: |
|
|
356 |
# x: A list or vector. Works if the list's elements are themselves |
|
|
357 |
# lists or vectors. |
|
|
358 |
# |
|
|
359 |
# Returns: |
|
|
360 |
# TRUE or FALSE, depending. |
|
|
361 |
if(length(x) == length(unique(x))) { |
|
|
362 |
return(factor(x, levels = x)) |
|
|
363 |
} else { |
|
|
364 |
stop('Elements of x must all be unique.') |
|
|
365 |
} |
|
|
366 |
} |
|
|
367 |
|
|
|
368 |
allSame <- function(x) { |
|
|
369 |
# Work out whether all elements of a list or vector are the same. |
|
|
370 |
# |
|
|
371 |
# Args: |
|
|
372 |
# x: A list or vector. Works if the list's elements are themselves |
|
|
373 |
# lists or vectors. |
|
|
374 |
# |
|
|
375 |
# Returns: |
|
|
376 |
# TRUE or FALSE, depending. |
|
|
377 |
length(unique(x)) == 1 |
|
|
378 |
} |
|
|
379 |
|
|
|
380 |
allSameLength <- function(x) { |
|
|
381 |
# Work out whether all elements of a list are the same length. |
|
|
382 |
# |
|
|
383 |
# Args: |
|
|
384 |
# x: A list. |
|
|
385 |
# |
|
|
386 |
# Returns: |
|
|
387 |
# TRUE or FALSE, depending. |
|
|
388 |
length(unique(lapply(x, length))) == 1 |
|
|
389 |
} |
|
|
390 |
|
|
|
391 |
################################################################################ |
|
|
392 |
### LISTS #################################################################### |
|
|
393 |
################################################################################ |
|
|
394 |
|
|
|
395 |
list2dataframe <- function(x) { |
|
|
396 |
# Simple wrapper to very naively turn a list into a data frame. If your list |
|
|
397 |
# elements have different numbers of elements, this will go wrong! |
|
|
398 |
# |
|
|
399 |
# Args: |
|
|
400 |
# x: A list. |
|
|
401 |
# |
|
|
402 |
# Returns: |
|
|
403 |
# A data frame made from the passed list. |
|
|
404 |
data.frame(matrix(unlist(x), ncol = length(x[[1]]), byrow = TRUE)) |
|
|
405 |
} |
|
|
406 |
|
|
|
407 |
################################################################################ |
|
|
408 |
### FILES #################################################################### |
|
|
409 |
################################################################################ |
|
|
410 |
|
|
|
411 |
list.dirs <- function(path=".", pattern=NULL, all.dirs=FALSE, |
|
|
412 |
ignore.case=FALSE) { |
|
|
413 |
# Lists the directories present within a path. |
|
|
414 |
# Credit: http://stackoverflow.com/questions/4749783 |
|
|
415 |
# |
|
|
416 |
# Args: |
|
|
417 |
# See list.files |
|
|
418 |
# |
|
|
419 |
# Returns: |
|
|
420 |
# A vector of directories within the path being searched. |
|
|
421 |
all <- list.files(path, pattern, all.dirs, |
|
|
422 |
full.names=TRUE, recursive=FALSE, ignore.case) |
|
|
423 |
all[file.info(all)$isdir] |
|
|
424 |
} |
|
|
425 |
|
|
|
426 |
writeTablePlus <- function(data, filename, comment = '', sep = '\t', |
|
|
427 |
comment.char = '#', row.names = FALSE, |
|
|
428 |
col.names = TRUE, ...) { |
|
|
429 |
# A wrapper for the write.table function which adds a comment of your choice |
|
|
430 |
# at the top of the file. |
|
|
431 |
# |
|
|
432 |
# Args: |
|
|
433 |
# filename: The name of the file to be written. |
|
|
434 |
# comment: The comment to be added at the top of the file. |
|
|
435 |
# sep: The separator for the data, tab by default. |
|
|
436 |
# comment.char: The character denoting comments, # by default. |
|
|
437 |
# row.names: FALSE by default, because who wants row names? |
|
|
438 |
# col.names: TRUE by default, because everyone wants column names! |
|
|
439 |
# ... : Allows arbitrary extra arguments relevant to write.table. |
|
|
440 |
# |
|
|
441 |
# Returns: |
|
|
442 |
# Nothing! |
|
|
443 |
|
|
|
444 |
f <- file(filename, open="wt") # open a connection to the file |
|
|
445 |
# if there's a comment, write that first |
|
|
446 |
if(nchar(comment) > 0) { |
|
|
447 |
# wrap the comment at 80 lines prefixed the comment character plus space |
|
|
448 |
comment <- |
|
|
449 |
strwrap(comment, width = 80, prefix = paste(comment.char, ' ', sep = '')) |
|
|
450 |
writeLines(comment, f) |
|
|
451 |
} |
|
|
452 |
write.table( |
|
|
453 |
data, f, sep=sep, row.names = row.names, col.names = col.names, ... |
|
|
454 |
) |
|
|
455 |
close(f) |
|
|
456 |
} |
|
|
457 |
|
|
|
458 |
readTablePlus <- function(filename, sep='\t', comment.char='#', header=TRUE, |
|
|
459 |
...) { |
|
|
460 |
# Handy wrapper for the read.table function to make it compatible with the |
|
|
461 |
# writeTablePlus function with its default options. |
|
|
462 |
# |
|
|
463 |
# Args: |
|
|
464 |
# filename: The name of the file to read. If it's a vector, the function |
|
|
465 |
# read the list of files provided and concatenate them into a |
|
|
466 |
# single data frame. |
|
|
467 |
if(length(filename) > 1) { |
|
|
468 |
do.call(rbind, lapply(filename, readTablePlus)) |
|
|
469 |
} else { |
|
|
470 |
read.table(filename, sep=sep, comment.char=comment.char, header=header, ...) |
|
|
471 |
} |
|
|
472 |
} |
|
|
473 |
|
|
|
474 |
readXlsxWriteTables <- function(filename, output.ext = 'tsv', sheet.names = NA, |
|
|
475 |
...) { |
|
|
476 |
# Write a multi-workbook xlsx file to a series of text files. Currently not |
|
|
477 |
# that useful, as neither read.xlsx nor read.xlsx2 does a very good job of |
|
|
478 |
# identifying column types, and you end up with a TSV composed of strings |
|
|
479 |
# |
|
|
480 |
# Args: |
|
|
481 |
# filename: The name of the Excel file to read. |
|
|
482 |
# output.ext: The extension of the output file(s). |
|
|
483 |
# sheet.names: The names of the sheets to be written out. Default NA will |
|
|
484 |
# write all sheets which are present. |
|
|
485 |
# ... : Allows arbitrary extra arguments to writeTablePlus |
|
|
486 |
|
|
|
487 |
# Require the xlsx library - not loaded by default as it's not often needed |
|
|
488 |
requirePlus('xlsx') |
|
|
489 |
|
|
|
490 |
# If no sheet names were provided... |
|
|
491 |
if(is.na(sheet.names)) { |
|
|
492 |
# ...get the sheet names by a few nested functions |
|
|
493 |
sheet.names <- names(getSheets(loadWorkbook(filename))) |
|
|
494 |
} |
|
|
495 |
|
|
|
496 |
base.filename <- justFilename(filename) |
|
|
497 |
|
|
|
498 |
# For the provided sheet names, go through and read them, and then write them |
|
|
499 |
for(sheet.name in sheet.names) { |
|
|
500 |
writeTablePlus( |
|
|
501 |
# read.xlsx2 is faster, and also assumes tabular data with a header, |
|
|
502 |
# probably the more likely use-case here |
|
|
503 |
read.xlsx2(filename, sheetName = sheet.name), |
|
|
504 |
# filename_sheetname.tsv |
|
|
505 |
paste0(base.filename, '_', sheet.name, '.', output.ext), |
|
|
506 |
... |
|
|
507 |
) |
|
|
508 |
} |
|
|
509 |
} |
|
|
510 |
|
|
|
511 |
justFilename <- function(x) { |
|
|
512 |
# Returns filenames without extensions. |
|
|
513 |
# |
|
|
514 |
# Args: |
|
|
515 |
# x: A character or vector of characters containing filenames, with or |
|
|
516 |
# without paths. |
|
|
517 |
# |
|
|
518 |
# Returns: |
|
|
519 |
# The string or vector with everything after and including a final full stop |
|
|
520 |
# removed. |
|
|
521 |
sapply(strsplit(basename(x),"\\."), |
|
|
522 |
function(x) paste(x[1:(length(x)-1)], |
|
|
523 |
collapse=".") |
|
|
524 |
) |
|
|
525 |
} |
|
|
526 |
|
|
|
527 |
fileExt <- function(x) { |
|
|
528 |
# Returns just extensions from filenames. |
|
|
529 |
# |
|
|
530 |
# Args: |
|
|
531 |
# x: A character or vector of characters containing filenames, with or |
|
|
532 |
# without paths. |
|
|
533 |
# |
|
|
534 |
# Returns: |
|
|
535 |
# Everything after a final full stop. |
|
|
536 |
|
|
|
537 |
# split the strings by full stops, and only take the final element |
|
|
538 |
extensions <- sapply(strsplit(basename(x),"\\."), |
|
|
539 |
function(x) tail(x, 1) |
|
|
540 |
) |
|
|
541 |
# where the extension is the same as the input filename, there is no extension |
|
|
542 |
extensions[extensions == x] <- '' |
|
|
543 |
|
|
|
544 |
extensions |
|
|
545 |
} |
|
|
546 |
|
|
|
547 |
suffixFilename <- function(x, suffix = '_1') { |
|
|
548 |
# Returns a filename with a suffix appended before its extension. |
|
|
549 |
# |
|
|
550 |
# Args: |
|
|
551 |
# x: A character or vector of characters containing filenames. |
|
|
552 |
# |
|
|
553 |
# Returns: |
|
|
554 |
# filename_suffix.ext |
|
|
555 |
paste0(justFilename(x), suffix, '.', fileExt(x)) |
|
|
556 |
} |
|
|
557 |
|
|
|
558 |
################################################################################ |
|
|
559 |
### MATHEMATICS ############################################################## |
|
|
560 |
################################################################################ |
|
|
561 |
|
|
|
562 |
floorPlus <- function(x, digits = 0) { |
|
|
563 |
# the function floor but with a "digits" option to floor not just to the |
|
|
564 |
# nearest integer |
|
|
565 |
# |
|
|
566 |
# Args: |
|
|
567 |
# x: vector to floor |
|
|
568 |
# digits: Number of decimal places to be used |
|
|
569 |
# |
|
|
570 |
# Returns: |
|
|
571 |
# integer/vector. |
|
|
572 |
floor(x*(10^digits)) / 10^digits |
|
|
573 |
} |
|
|
574 |
|
|
|
575 |
ceilPlus <- function(x, digits = 0) { |
|
|
576 |
# the function ceil but with a "digits" option to floor not just to the |
|
|
577 |
# nearest integer |
|
|
578 |
# |
|
|
579 |
# Args: |
|
|
580 |
# x: vector to floor |
|
|
581 |
# digits: Number of decimal places to be used |
|
|
582 |
# |
|
|
583 |
# Returns: |
|
|
584 |
# integer/vector. |
|
|
585 |
ceil(x*(10^digits)) / 10^digits |
|
|
586 |
} |
|
|
587 |
|
|
|
588 |
tri <- function(x) { |
|
|
589 |
# Calculates the xth triangular number. |
|
|
590 |
# |
|
|
591 |
# Args: |
|
|
592 |
# x: A number. |
|
|
593 |
# |
|
|
594 |
# Returns: |
|
|
595 |
# The xth triangular number. |
|
|
596 |
x * (x + 1) / 2 |
|
|
597 |
} |
|
|
598 |
|
|
|
599 |
trirt <- function(x) { |
|
|
600 |
# Calculates the triangular root of a number. |
|
|
601 |
# |
|
|
602 |
# Args: |
|
|
603 |
# x: A number. |
|
|
604 |
# |
|
|
605 |
# Returns: |
|
|
606 |
# Its triangular root. |
|
|
607 |
(sqrt(8*x + 1) - 1) / 2 |
|
|
608 |
} |
|
|
609 |
|
|
|
610 |
# A series of functions which allow arithmetic on quantities with uncertainty. |
|
|
611 |
# Create a quantity by passing values or vectors to unum(x, dx), and then add, |
|
|
612 |
# subtract, multiply or divide with the functions below. |
|
|
613 |
unum <- function(x, dx) { data.frame(x=x, dx=dx) } |
|
|
614 |
# Calculates the triangular root of a number. |
|
|
615 |
# |
|
|
616 |
# Args: |
|
|
617 |
# x: A number or vector of numbers. |
|
|
618 |
# dx: A number or vector of numbers representing the uncertainty on x. |
|
|
619 |
# |
|
|
620 |
# Returns: |
|
|
621 |
# A data frame with columns x and dx which can be used for further |
|
|
622 |
# operations. |
|
|
623 |
uadd <- function(a, b) { |
|
|
624 |
z <- a$x + b$x |
|
|
625 |
dz <- sqrt(a$dx^2 + b$dx^2) |
|
|
626 |
unum(z, dz) |
|
|
627 |
} |
|
|
628 |
usub <- function(a, b) { |
|
|
629 |
z <- a$x - b$x |
|
|
630 |
dz <- sqrt(a$dx^2 + b$dx^2) |
|
|
631 |
unum(z, dz) |
|
|
632 |
} |
|
|
633 |
umul <- function(a, b) { |
|
|
634 |
z <- a$x * b$x |
|
|
635 |
dz <- z * sqrt((a$dx/a$x)^2 + (b$dx/b$x)^2) |
|
|
636 |
unum(z, dz) |
|
|
637 |
} |
|
|
638 |
udiv <- function(a, b) { |
|
|
639 |
z <- a$x / b$x |
|
|
640 |
dz <- z * sqrt((a$dx/a$x)^2 + (b$dx/b$x)^2) |
|
|
641 |
unum(z, dz) |
|
|
642 |
} |
|
|
643 |
|
|
|
644 |
normalise <- function(x, FUN = sum) { |
|
|
645 |
# Returns a vector normalised by the function FUN, default being sum so the |
|
|
646 |
# vector would now sum to 1. Another example would be max, so the largest |
|
|
647 |
# value in x becomes 1. |
|
|
648 |
# |
|
|
649 |
# Args: |
|
|
650 |
# x: A vector. |
|
|
651 |
# FUN: A function which returns a single value when applied to a vector. |
|
|
652 |
# |
|
|
653 |
# Returns: |
|
|
654 |
# A vector, normalised appropriately. |
|
|
655 |
if(!is.function(FUN)) stop('Passed FUN is not a function') |
|
|
656 |
x / FUN(x) |
|
|
657 |
} |
|
|
658 |
|
|
|
659 |
minGt <- function(x, gt = 0) { |
|
|
660 |
# Return the minimum value of a vector greater than a value gt. |
|
|
661 |
# |
|
|
662 |
# Args: |
|
|
663 |
# x: A vector. |
|
|
664 |
# q: The value which this minimum value must be greater than, defaulting to |
|
|
665 |
# 0 (which returns the minimum positive value). |
|
|
666 |
# |
|
|
667 |
# Returns: |
|
|
668 |
# The minimum positive value, eg c(-1, 0, 2, 4) would return 2. |
|
|
669 |
min(x[x > gt]) |
|
|
670 |
} |
|
|
671 |
|
|
|
672 |
minPositive <- function(x) { |
|
|
673 |
# Return the minimum positive value of a vector. Wrapper for minGt. |
|
|
674 |
# |
|
|
675 |
# Args: |
|
|
676 |
# x: A vector. |
|
|
677 |
# Returns: |
|
|
678 |
# The minimum positive value, eg c(-1, 0, 2, 4) would return 2. |
|
|
679 |
minGt(x, 0) |
|
|
680 |
} |
|
|
681 |
|
|
|
682 |
################################################################################ |
|
|
683 |
### STATISTICS ############################################################### |
|
|
684 |
################################################################################ |
|
|
685 |
|
|
|
686 |
stdErr <- function(x) { sqrt(var(x)/length(x)) } |
|
|
687 |
# For a vector x, returns the standard error on the mean. |
|
|
688 |
# |
|
|
689 |
# Args: |
|
|
690 |
# x: A vector. |
|
|
691 |
# |
|
|
692 |
# Returns: |
|
|
693 |
# The standard error on the mean. |
|
|
694 |
|
|
|
695 |
cv <- function(x) { sd(x)/mean(x) } |
|
|
696 |
# For a vector x, returns the coefficient of variation. |
|
|
697 |
# |
|
|
698 |
# Args: |
|
|
699 |
# x: A vector. |
|
|
700 |
# |
|
|
701 |
# Returns: |
|
|
702 |
# The coefficient of variation. |
|
|
703 |
|
|
|
704 |
covar <- function(x) { |
|
|
705 |
# Wrapper function which returns the variance for a single-column vector and |
|
|
706 |
# a covariance matrix for a multi-column vector. |
|
|
707 |
# |
|
|
708 |
# Args: |
|
|
709 |
# x: Some data. |
|
|
710 |
# |
|
|
711 |
# Returns: |
|
|
712 |
# The covariance matrix. |
|
|
713 |
if(is.null(dim(x))) { |
|
|
714 |
return(var(x)) |
|
|
715 |
} else { |
|
|
716 |
return(cov(x)) |
|
|
717 |
} |
|
|
718 |
} |
|
|
719 |
|
|
|
720 |
popvar <- function(x, na.rm = FALSE) { |
|
|
721 |
# Calculates population variance instead of sample variance (which is the |
|
|
722 |
# default of the var() function in R). |
|
|
723 |
# |
|
|
724 |
# Args: |
|
|
725 |
# x: a vector of the population data. |
|
|
726 |
# na.rm: a logical value indicating whether NA values should be stripped |
|
|
727 |
# before the computation proceeds. |
|
|
728 |
# |
|
|
729 |
# Returns: |
|
|
730 |
# The population variance. |
|
|
731 |
if(na.rm) { |
|
|
732 |
x <- x[!is.na(x)] |
|
|
733 |
} else if(any(is.na(x))) { |
|
|
734 |
return(NA) |
|
|
735 |
} |
|
|
736 |
mean((x-mean(x))^2) |
|
|
737 |
} |
|
|
738 |
|
|
|
739 |
weightedMeanPlus <- function(x, w, na.rm = FALSE) { |
|
|
740 |
# Compute a weighted mean, where the na.rm argument ignores NA values in both |
|
|
741 |
# the values and their weights. (The default R function returns NA if any |
|
|
742 |
# weight is NA even with na.rm = TRUE.) |
|
|
743 |
# |
|
|
744 |
# Args: |
|
|
745 |
# x: an object containing the values whose weighted mean is to be |
|
|
746 |
# computed. |
|
|
747 |
# w: a numerical vector of weights the same length as x giving the |
|
|
748 |
# weights to use for elements of x. |
|
|
749 |
# na.rm: a logical value indicating whether NA values should be stripped |
|
|
750 |
# before the computation proceeds. |
|
|
751 |
# |
|
|
752 |
# Returns: |
|
|
753 |
# The weighted mean. |
|
|
754 |
if(na.rm) { |
|
|
755 |
not.na <- !(is.na(x) | is.na(w)) |
|
|
756 |
wm <- weighted.mean(x[not.na], w[not.na]) |
|
|
757 |
} else { |
|
|
758 |
wm <- weighted.mean(x, w) |
|
|
759 |
} |
|
|
760 |
wm |
|
|
761 |
} |
|
|
762 |
|
|
|
763 |
################################################################################ |
|
|
764 |
### MISCELLANEOUS ############################################################ |
|
|
765 |
################################################################################ |
|
|
766 |
|
|
|
767 |
NA2val <- function(x, val = 0) { |
|
|
768 |
# Wrapper to turn NAs in an object into a value of your choice. |
|
|
769 |
# |
|
|
770 |
# Args: |
|
|
771 |
# x: The object containing errant NA values. |
|
|
772 |
# val: The value to replace the NAs with, default 0. |
|
|
773 |
# |
|
|
774 |
# Returns: |
|
|
775 |
# The object with the NAs replaced appropriately. |
|
|
776 |
x[is.na(x)] <- val |
|
|
777 |
x |
|
|
778 |
} |
|
|
779 |
|
|
|
780 |
isExactlyNA <- function(x) { |
|
|
781 |
# Is an object NA, or is it another kind of object? Unlike is.na, this is not |
|
|
782 |
# a vector operation and doesn't return eg a vector with whether or not each |
|
|
783 |
# value is NA, it would simply return FALSE because the object isn't an NA. |
|
|
784 |
# This is to prevent warnings when performing if statements on things like |
|
|
785 |
# optional arguments where NA is the default, but a vector could be passed, |
|
|
786 |
# and the if statement then warns that only the first element was used. |
|
|
787 |
# |
|
|
788 |
# Args: |
|
|
789 |
# x: The object to be tested. |
|
|
790 |
# |
|
|
791 |
# Returns: |
|
|
792 |
# TRUE if the object is literally just NA; FALSE otherwise. |
|
|
793 |
|
|
|
794 |
# NA is of type logical, so in order to be a true NA, it must be... |
|
|
795 |
if(is.logical(x) & length(x) == 1) { |
|
|
796 |
# If so, is it NA? |
|
|
797 |
return(is.na(x)) |
|
|
798 |
} else { |
|
|
799 |
# Otherwise, return FALSE |
|
|
800 |
return(FALSE) |
|
|
801 |
} |
|
|
802 |
} |
|
|
803 |
|
|
|
804 |
firstElement <- function(x) { |
|
|
805 |
# Function for apply-ing to lists which will return the first element of a |
|
|
806 |
# list element |
|
|
807 |
# Args: |
|
|
808 |
# x: An object with elements. |
|
|
809 |
# |
|
|
810 |
# Returns: |
|
|
811 |
# The first element of that object. |
|
|
812 |
x[1] |
|
|
813 |
} |
|
|
814 |
|
|
|
815 |
NArm <- function(x) { |
|
|
816 |
# Remove NAs from a vector. |
|
|
817 |
x[!is.na(x)] |
|
|
818 |
} |
|
|
819 |
|
|
|
820 |
|
|
|
821 |
samplePlus <- function(x, ..., na.rm = TRUE, only.unique = FALSE) { |
|
|
822 |
# Extension of the sample function from base R with the option of only |
|
|
823 |
# sampling from non-missing values. |
|
|
824 |
# |
|
|
825 |
# Args: |
|
|
826 |
# x: A vector of one or more elements from which to choose. |
|
|
827 |
# ...: Other arguments to sample (ie size, replace, prob) |
|
|
828 |
# na.rm: Whether or not to remove NAs. Default TRUE since otherwise why are |
|
|
829 |
# you using this wrapper function? |
|
|
830 |
# only.unique: Sample from only the unique values of x? |
|
|
831 |
# |
|
|
832 |
# Returns: |
|
|
833 |
# A sample from the vector (see sample documentation), without NAs if na.rm |
|
|
834 |
# is set to TRUE, and only drawn from unique values of x is only.unique is |
|
|
835 |
# set to TRUE. |
|
|
836 |
|
|
|
837 |
if(na.rm) { |
|
|
838 |
x <- NArm(x) |
|
|
839 |
} |
|
|
840 |
if(only.unique) { |
|
|
841 |
x <- unique(x) |
|
|
842 |
} |
|
|
843 |
sample(x, ...) |
|
|
844 |
} |
|
|
845 |
|
|
|
846 |
permute <- |
|
|
847 |
function( |
|
|
848 |
# Randomly permute (some) elements of a vector or character string to create a |
|
|
849 |
# (slightly) randomised version of it. |
|
|
850 |
# |
|
|
851 |
# Args: |
|
|
852 |
x, |
|
|
853 |
# A vector or character string to have its contents permuted. |
|
|
854 |
frac = 1.0, |
|
|
855 |
# The fraction of the contents to be permuted, from 0 (no permutation) |
|
|
856 |
# to 1 (permute everything). |
|
|
857 |
n.permute = NA |
|
|
858 |
# The number of items to permute. Defaults to being calculated from frac |
|
|
859 |
# but can be specified manually too. Must be a multiple of 2, because |
|
|
860 |
# elements are swapped in pairs. |
|
|
861 |
# |
|
|
862 |
# Returns: |
|
|
863 |
# The vector or string with n.permute of its elements permuted. |
|
|
864 |
) { |
|
|
865 |
# if frac is not a fraction, throw an error |
|
|
866 |
if(frac < 0.0 | frac > 1.0) { |
|
|
867 |
stop(paste0('frac = ', frac,'; it must be between 0 and 1.')) |
|
|
868 |
} else if(!is.na(n.permute) & n.permute %% 2 != 0) { |
|
|
869 |
stop(paste0('n.permute = ', n.permute,'; it must be divisible by 2.')) |
|
|
870 |
} |
|
|
871 |
|
|
|
872 |
# if x is a character string, make it into a vector for processing and set a |
|
|
873 |
# reminder to put it back as a string before returning |
|
|
874 |
if(class(x) == 'character') { |
|
|
875 |
x.is.string <- TRUE |
|
|
876 |
x <- strsplit(x, '')[[1]] |
|
|
877 |
} else { |
|
|
878 |
x.is.string <- FALSE |
|
|
879 |
} |
|
|
880 |
|
|
|
881 |
# if n.permute was not provided, we can now calculate it |
|
|
882 |
if(is.na(n.permute)) { |
|
|
883 |
n.permute <- round(length(x)*frac / 2) * 2 # make sure it's a multiple of 2! |
|
|
884 |
} |
|
|
885 |
|
|
|
886 |
# if n.permute is longer than the vector... |
|
|
887 |
if(n.permute > length(x)) { |
|
|
888 |
stop(paste0('n.permute = ', n.permute,', which is greater than the length ', |
|
|
889 |
'of the string or vector provided, ', length(x))) |
|
|
890 |
} |
|
|
891 |
|
|
|
892 |
# Create a random sample for pairs of positions to swap between |
|
|
893 |
swapsies <- sample(1:length(x), n.permute) |
|
|
894 |
|
|
|
895 |
# take those swapping positions and move them around; reversing the indices in |
|
|
896 |
# the second part of the function implies that position 1 will swap with |
|
|
897 |
# position n, 2 with n-1, etc... |
|
|
898 |
x <- replace(x, swapsies, x[rev(swapsies)]) |
|
|
899 |
|
|
|
900 |
# if it was a string then reassemble it before returning |
|
|
901 |
if(x.is.string) { |
|
|
902 |
x <- paste(x, collapse='') |
|
|
903 |
} |
|
|
904 |
|
|
|
905 |
x |
|
|
906 |
} |
|
|
907 |
|
|
|
908 |
requirePlus <- function(..., install = TRUE, quietly = TRUE) { |
|
|
909 |
# Simply require a number of packages in the same command, and install them if |
|
|
910 |
# not present. |
|
|
911 |
# |
|
|
912 |
# Args: |
|
|
913 |
# packages: A vector of the names of the packages to be imported. |
|
|
914 |
# install: Logical indicating whether missing packages should be installed. |
|
|
915 |
# quietly: As with require, quietly suppresses messages. |
|
|
916 |
# |
|
|
917 |
# Returns: |
|
|
918 |
# Nothing (though warning and error messages are displayed on failure) |
|
|
919 |
|
|
|
920 |
package.list <- c(...) |
|
|
921 |
# if the install parameter is true, install missing packages |
|
|
922 |
if(install) { |
|
|
923 |
# Check for missing packages |
|
|
924 |
packages.present <- package.list %in% rownames(installed.packages()) |
|
|
925 |
# And, if there are any, install them |
|
|
926 |
if(any(!packages.present)) { |
|
|
927 |
message( |
|
|
928 |
paste('Installing missing packages', |
|
|
929 |
paste(package.list[!packages.present], collapse = ', ') |
|
|
930 |
) |
|
|
931 |
) |
|
|
932 |
install.packages(package.list[!packages.present]) |
|
|
933 |
} |
|
|
934 |
} |
|
|
935 |
# loop over packages, importing them |
|
|
936 |
require.success <- unlist( |
|
|
937 |
# suppress warnings, because we'll tell the user which packages failed later |
|
|
938 |
suppressWarnings( |
|
|
939 |
lapply(package.list, require, character.only = TRUE, quietly = quietly) |
|
|
940 |
) |
|
|
941 |
) |
|
|
942 |
|
|
|
943 |
# If at least some packages imported successfully and the user hasn't asked |
|
|
944 |
# for quietness... |
|
|
945 |
if(sum(require.success) > 0 & !quietly) { |
|
|
946 |
message( |
|
|
947 |
paste('Successfully imported packages', |
|
|
948 |
paste(package.list[require.success], collapse = ', ') |
|
|
949 |
) |
|
|
950 |
) |
|
|
951 |
} |
|
|
952 |
|
|
|
953 |
# If at least some packages failed, warn the user regardless of quietly arg |
|
|
954 |
if(sum(!require.success) > 0) { |
|
|
955 |
warning( |
|
|
956 |
paste('Failed to import packages', |
|
|
957 |
paste(package.list[!require.success], collapse = ', ') |
|
|
958 |
) |
|
|
959 |
) |
|
|
960 |
} |
|
|
961 |
} |
|
|
962 |
|
|
|
963 |
initParallel <- function(cores = NULL, backend = 'doMC') { |
|
|
964 |
# Wrapper to initialise parallel computing functionality. |
|
|
965 |
# |
|
|
966 |
# Args: |
|
|
967 |
# cores: The number of cores to use simultaneously. If absent, use the |
|
|
968 |
# default from the relevant backend. |
|
|
969 |
# backend: Which backend to use. Currently supports doMC and doParallel. |
|
|
970 |
# |
|
|
971 |
# Returns: |
|
|
972 |
# Nothing. |
|
|
973 |
|
|
|
974 |
if(backend == 'doMC') { |
|
|
975 |
requirePlus('doMC', 'foreach') |
|
|
976 |
registerDoMC(cores) |
|
|
977 |
} else if(backend == 'doParallel') { |
|
|
978 |
requirePlus('doParallel', 'foreach') |
|
|
979 |
cl <- makeCluster(cores) |
|
|
980 |
registerDoParallel(cl) |
|
|
981 |
return(cl) |
|
|
982 |
} else { |
|
|
983 |
stop('Unrecognised backend', backend) |
|
|
984 |
} |
|
|
985 |
} |
|
|
986 |
|
|
|
987 |
inRange <- |
|
|
988 |
function(x, rang, incl.end = rep(FALSE, length(rang)), na.false = FALSE) { |
|
|
989 |
# Returns a vector of booleans specifying whether values of x fall within the |
|
|
990 |
# range rang. |
|
|
991 |
# |
|
|
992 |
# Args: |
|
|
993 |
# x: A vector of numbers. |
|
|
994 |
# rang: A vector specifying the range within which they are permitted to |
|
|
995 |
# fall. It can be of any length; only the smallest and largest |
|
|
996 |
# values are used. Called 'rang' so as not to clash with the |
|
|
997 |
# 'range' function in base R. |
|
|
998 |
# incl.end: A vector of the same length as rang specifying whether a given |
|
|
999 |
# endpoint is included or excluded from the range. |
|
|
1000 |
# na.false: Return NA values as not in range? |
|
|
1001 |
# |
|
|
1002 |
# Returns: |
|
|
1003 |
# A boolean with the same length of x, TRUE if within the range specified, |
|
|
1004 |
# FALSE otherwise. |
|
|
1005 |
|
|
|
1006 |
# find the indices of the smallest and largest values of the vector |
|
|
1007 |
# (presumably usually there will only be two!) |
|
|
1008 |
i.min <- which.min(rang) |
|
|
1009 |
i.max <- which.max(rang) |
|
|
1010 |
|
|
|
1011 |
# find those which are bigger than the minimum value (including endpoint if |
|
|
1012 |
# specified)... |
|
|
1013 |
if(incl.end[i.min]) {x >= rang[i.min]} else {x > rang[i.min]} & |
|
|
1014 |
# ...and those smaller than the max... |
|
|
1015 |
if(incl.end[i.max]) {x <= rang[i.max]} else {x < rang[i.max]} & |
|
|
1016 |
# ...and, if applicable, set NA values to false? |
|
|
1017 |
if(na.false) {!is.na(x)} else {TRUE} |
|
|
1018 |
# ...and return it! |
|
|
1019 |
} |
|
|
1020 |
|
|
|
1021 |
logfileStart <- function(filename = default.logfile.name) { |
|
|
1022 |
# Wrapper for creating a new blank log file during script execution. |
|
|
1023 |
# NB This will silently overwrite existing files! |
|
|
1024 |
# |
|
|
1025 |
# Args: |
|
|
1026 |
# filename: The name of the file to create. |
|
|
1027 |
# |
|
|
1028 |
# Returns: |
|
|
1029 |
# Nothing. |
|
|
1030 |
# |
|
|
1031 |
# Globals: |
|
|
1032 |
# Creates a global variable called logfileName so that related functions |
|
|
1033 |
# know where to write to. |
|
|
1034 |
logfileName <<- filename |
|
|
1035 |
cat('', file = filename) |
|
|
1036 |
} |
|
|
1037 |
|
|
|
1038 |
logfileCat <- function(..., |
|
|
1039 |
newline = TRUE, sep = "", fill = FALSE, |
|
|
1040 |
filename = logfileName |
|
|
1041 |
) { |
|
|
1042 |
# Wrapper for adding an entry to a log file. |
|
|
1043 |
# |
|
|
1044 |
# Args: |
|
|
1045 |
# ... : Stuff to write to the file |
|
|
1046 |
# newline: Whether to start a new line after the entry. |
|
|
1047 |
# sep: Separator between objects to write. |
|
|
1048 |
# fill: The fill option for cat(). |
|
|
1049 |
# filename: The name of the file to write to; default being the global |
|
|
1050 |
# variable set by logfileStart. |
|
|
1051 |
# |
|
|
1052 |
# Returns: |
|
|
1053 |
# Nothing. |
|
|
1054 |
if(newline & !fill) append.me <- "\n" else append.me <- NULL |
|
|
1055 |
cat(..., append.me, file = filename, sep = sep, fill = fill, |
|
|
1056 |
append = TRUE) |
|
|
1057 |
} |
|
|
1058 |
|
|
|
1059 |
logfileEnd <- function() { |
|
|
1060 |
# Wrapper for blanking the existing logfileName such that no further entries |
|
|
1061 |
# are written to it given the default options for logfileCat. |
|
|
1062 |
# |
|
|
1063 |
# Args: |
|
|
1064 |
# None. |
|
|
1065 |
# |
|
|
1066 |
# Returns: |
|
|
1067 |
# Nothing. |
|
|
1068 |
# |
|
|
1069 |
# Globals: |
|
|
1070 |
# Sets logfileName to "". |
|
|
1071 |
logfileName <<- "" |
|
|
1072 |
} |
|
|
1073 |
|
|
|
1074 |
unixTimestamp <-function() { |
|
|
1075 |
# Quick function to generate the UNIX timestamp. |
|
|
1076 |
# |
|
|
1077 |
# Args: |
|
|
1078 |
# None. |
|
|
1079 |
# |
|
|
1080 |
# Returns: |
|
|
1081 |
# Time in whole seconds since the start of the Unix epoch (01/01/1970 UTC) |
|
|
1082 |
as.numeric(Sys.time()) |
|
|
1083 |
} |
|
|
1084 |
|
|
|
1085 |
getUserInput <- function(s, parse.fun = NULL, validate.fun = NULL, e = NULL) { |
|
|
1086 |
# Get input from the user typing at the terminal. |
|
|
1087 |
# |
|
|
1088 |
# Args: |
|
|
1089 |
# s: The question to present the user with. |
|
|
1090 |
# parse.fun: Optional function with which to parse the input string. |
|
|
1091 |
# validate.fun: Optional function with which to validate the input string. |
|
|
1092 |
# e: Error message to display if the (cleaned) value fails validation. |
|
|
1093 |
# |
|
|
1094 |
# Returns: |
|
|
1095 |
# A parsed, validated input value. |
|
|
1096 |
|
|
|
1097 |
# loop, to keep asking for input if there's a problem |
|
|
1098 |
repeat { |
|
|
1099 |
# get the user's input by asking them s |
|
|
1100 |
user.input <- readline(s) |
|
|
1101 |
# if a function to clean input has been specified, run it |
|
|
1102 |
if(!is.null(parse.fun)) { |
|
|
1103 |
user.input <- parse.fun(user.input) |
|
|
1104 |
} |
|
|
1105 |
# if a function to validate input has been specified |
|
|
1106 |
if(!is.null(validate.fun)) { |
|
|
1107 |
# if the input validates... |
|
|
1108 |
if(validate.fun(user.input)) { |
|
|
1109 |
# ...the pass it back |
|
|
1110 |
return(user.input) |
|
|
1111 |
} |
|
|
1112 |
} else { |
|
|
1113 |
# if there's no validation to perform, return it anyway |
|
|
1114 |
return(user.input) |
|
|
1115 |
} |
|
|
1116 |
# if we've got this far, validation must have failed...print an error |
|
|
1117 |
# message and have another try... |
|
|
1118 |
cat(e) |
|
|
1119 |
} |
|
|
1120 |
} |
|
|
1121 |
|
|
|
1122 |
getUserInputInteger <- function(s) { |
|
|
1123 |
# Wrapper function to use getUserInput to acquire an integer from the user. |
|
|
1124 |
# |
|
|
1125 |
# Args: |
|
|
1126 |
# s: The question to present the user with. |
|
|
1127 |
# |
|
|
1128 |
# Returns: |
|
|
1129 |
# An integer. |
|
|
1130 |
getUserInput(s, |
|
|
1131 |
parse.fun = function(x){ |
|
|
1132 |
suppressWarnings(as.integer(x)) |
|
|
1133 |
}, |
|
|
1134 |
validate.fun = function(x) { |
|
|
1135 |
ifelse(!is.na(x), TRUE, FALSE) |
|
|
1136 |
}, |
|
|
1137 |
e = 'Could not parse input as an integer.') |
|
|
1138 |
} |
|
|
1139 |
|
|
|
1140 |
|
|
|
1141 |
|
|
|
1142 |
handyTimer <- function(t = NA, numeric = TRUE) { |
|
|
1143 |
# Wrapper function for easy to read timing code. |
|
|
1144 |
# |
|
|
1145 |
# Args: |
|
|
1146 |
# t: The time. Pass nothing (or NA) and the function will return the current |
|
|
1147 |
# time, ie start the clock. Pass a numeric or time object (ie a |
|
|
1148 |
# previously stored start time), and the function returns the difference |
|
|
1149 |
# (ie a time you wanted to measure). |
|
|
1150 |
# numeric: Whether to convert your time into a simple numerical value. FALSE |
|
|
1151 |
# results in returning an R time or time difference object. |
|
|
1152 |
# |
|
|
1153 |
# Returns: |
|
|
1154 |
# Either the current time, or the time since t. |
|
|
1155 |
|
|
|
1156 |
# If t is NA, get current time |
|
|
1157 |
if(is.na(t)) { |
|
|
1158 |
t <- proc.time()['elapsed'] |
|
|
1159 |
# Otherwise, get time difference |
|
|
1160 |
} else { |
|
|
1161 |
t <- proc.time()['elapsed'] - t |
|
|
1162 |
} |
|
|
1163 |
|
|
|
1164 |
# If the user wanted a numerical answer, as.numeric it |
|
|
1165 |
if(numeric) { |
|
|
1166 |
t <- as.numeric(t) |
|
|
1167 |
} |
|
|
1168 |
|
|
|
1169 |
# Return t |
|
|
1170 |
t |
|
|
1171 |
} |
|
|
1172 |
|
|
|
1173 |
varName <- function(...) { |
|
|
1174 |
# Get the name of a variable as a string. Inspired by: |
|
|
1175 |
# http://stackoverflow.com/questions/14577412/ |
|
|
1176 |
# Args: |
|
|
1177 |
# x: A variable, eg myvar. |
|
|
1178 |
# |
|
|
1179 |
# Returns: |
|
|
1180 |
# The name of the variable, eg 'myvar'. |
|
|
1181 |
|
|
|
1182 |
do.call( |
|
|
1183 |
function(x) { deparse(substitute(x)) }, |
|
|
1184 |
... |
|
|
1185 |
|
|
|
1186 |
) |
|
|
1187 |
} |
|
|
1188 |
|
|
|
1189 |
varsToTable <- function(df, filename, index.cols = 1, ...) { |
|
|
1190 |
# Function to serialise variables for storage as a simple table in a text |
|
|
1191 |
# file. Takes a data frame df with columns (eg id1, id2, var1, var2) and, if |
|
|
1192 |
# a preexisting value is found with the same index.cols (eg c('id1', 'id2'), |
|
|
1193 |
# or 1:2 in this example), replaces the values (var1 and var2) or, if not, |
|
|
1194 |
# appends them, then writes the results to filename. |
|
|
1195 |
# |
|
|
1196 |
# TODO: Check that df has the same shape/column names(?) as the file. |
|
|
1197 |
# TODO: Check for repeated indices in the df provided. |
|
|
1198 |
# |
|
|
1199 |
# Args: |
|
|
1200 |
# df: A data frame of indices and values to store. |
|
|
1201 |
# filename: A file in which to store them. |
|
|
1202 |
# index.cols: Which columns to use as indices, to find and replace existing |
|
|
1203 |
# values with the same index. |
|
|
1204 |
# ...: Extra arguments for writeTablePlus. |
|
|
1205 |
# |
|
|
1206 |
# Returns: |
|
|
1207 |
# Nothing, just writes the data to file. |
|
|
1208 |
if(file.exists(filename)) { |
|
|
1209 |
vars.table <- readTablePlus(filename) |
|
|
1210 |
# First, append our data frame to the existing table |
|
|
1211 |
vars.table <- rbind(vars.table, df) |
|
|
1212 |
|
|
|
1213 |
# Find the duplicate values of just the index columns, searching from the |
|
|
1214 |
# end because we want to keep the newer values we just rbind-ed in that |
|
|
1215 |
# case, and only keep unique ones. |
|
|
1216 |
vars.table <- |
|
|
1217 |
vars.table[!(duplicated(vars.table[, index.cols], fromLast = TRUE)), ] |
|
|
1218 |
|
|
|
1219 |
} else { |
|
|
1220 |
# If there's no existing file, just create a fresh data frame |
|
|
1221 |
vars.table <- df |
|
|
1222 |
} |
|
|
1223 |
|
|
|
1224 |
# Write the resulting data frame to a file of the given name |
|
|
1225 |
writeTablePlus(vars.table, filename, ...) |
|
|
1226 |
} |