--- a +++ b/lib/handy.R @@ -0,0 +1,1226 @@ +################################################################################ +### VARIABLE DEFINITION ###################################################### +################################################################################ +default.logfile.name <- 'log.txt' + +################################################################################ +### DATA FRAMES ############################################################## +################################################################################ + +sample.df <- function(df, size = 1, replace = FALSE, prob = NULL) { + # Samples rows from a data frame. + # + # Args: + # df: A data frame. + # size, replace, prob: Arguments from the sample function. + # + # Returns: + # size rows from this data frame (default 1), with or without replacement + # and with an optional probability vector. + df[sample(nrow(df), size, replace, prob), , drop = FALSE] +} + +bootstrapSampleDf <- function(df) { + # Quick wrapper function for simple bootstrap sampling of a data frame. + # + # Args: + # df: A data frame. + # + # Returns: + # A data frame with the same number of rows as the original, but randomly + # sampled with replacement. + sample.df(df, size = nrow(df), replace = TRUE) +} + +withoutCols <- function(df, cols) { + # Returns a vector to allow certain columns to be excluded from a + # data frame. The case where the columns being excluded are referred to + # numerically is trivial, but is included as well for generality. + # + # Args: + # df: a data frame. + # cols: the columns to be excluded, as a vector or scalar of number(s) or + # name(s). + # + # Returns: + # In the text case, a vector of booleans; TRUE for columns to include. + # In the numerical case, R understands df[-3,], so just minus the input. + if(is.character(cols)) { return(!(names(df) %in% cols)) } + else { return(-cols) } +} + +explodeByCol <- function(df, cols, sep=',', regex = NULL, + fixed = TRUE) { + # If your data frame contains multiple values in a single column, this splits + # multiple values across different rows, using either a separator character or + # a regular expression. + # + # Args: + # df: a data frame. + # cols: one or more column(s) to explode by. + # sep: the separator of the multiple values. + # regex: a regular expression which matches the values you're looking for; + # overrides sep. + # fixed: for strsplit, this variable determines whether the string passed + # in sep is fixed (TRUE) or a regular expression (FALSE). + # + # Returns: + # A data frame with new rows, one for each value in the exploded column. + + # data frames fairly often come in with 'character' columns which are factors, + # and these string-based functions can't handle that, so convert with a + # warning + + # instantiate a list to contain the exploded output + exploded <- list() + n.exploded <- list() + + for(col in cols) { + if(is.factor(df[, col])) { + warning( + paste0('Column ', col, ' is a factor, and has been coerced ', + 'to a character for exploding.') + ) + df[, col] <- as.character(df[, col]) + } else if(!is.character(df[, col])) { + # if it's not character data, it won't work, so pass an error + stop( + paste0('Column ', col, ' passed to explodeByType should be character ', + 'data; it is of class ', class(df[, col]), '.' + ) + ) + } + # if regex is NULL, use the separator provided + if(is.null(regex)) { + exploded[col] <- list(strsplit(df[, col], sep, fixed = fixed)) + # otherwise, use a regular expression to split the column + } else { + exploded[col] <- list(regmatches(df[, col], gregexpr(regex, df[, col]))) + } + # how many of each row should I create? ie 1,1,2,1,0 + n.exploded[[col]] <- sapply(exploded[[col]], length) + } + + # check the n.exploded values are the same for all columns + if(!allSame(n.exploded)) { + stop( + paste0('The columns provided have inconsistent numbers of elements ', + 'after exploding.' + ) + ) + } + # turn the first element of n.exploded into a list of data frame row indices, + # ie 1,2,3,3,4 + n.exploded.rows <- rep(1:length(n.exploded[[cols[1]]]), n.exploded[[cols[1]]]) + + # take the data frame and repeat rows the relevant number of times + df <- df[n.exploded.rows, ] + # fill its exploded column(s) with the appropriate values + for(col in cols) { + df[, col] <- unlist(exploded[[col]]) + } + df +} + +################################################################################ +### CHARACTERS ############################################################### +################################################################################ + +removeWhitespace <- function(x) { gsub("\\s","", x) } +# For a character or vector of characters x, removes all spaces and line +# breaks. +# +# Args: +# x: A character or vector of characters. +# +# Returns: +# The character with whitespace removed. + +pastePlus <- function(..., sep=" ", collapse = NULL, recycleZeroLength = TRUE) { + # Version of the base R paste function which optionally returns nothing if any + # of the ...s being concatenated have zero length. (Default behaviour is to + # recycle them to "".) + # + # Args: + # ..., sep, collapse: as paste in base R + # ignoreZeroLength: + # + # Returns: + # If any of the passed objects has zero length, NULL; otherwise, the + # result of the paste function. + if(!recycleZeroLength & + any(lapply(list(...), length) == 0)) { + return(NULL); + } + paste(..., sep = sep, collapse = collapse) +} + +paste0Plus <- function(..., collapse = NULL, recycleZeroLength = TRUE) { + pastePlus(..., sep="", collapse = collapse, + recycleZeroLength = recycleZeroLength) +} + +strPos <- function(..., fixed = TRUE) { + # Wrapper function which returns the positions of the first occurrence of a + # pattern in some text. Simplifies regexpr which returns a variety of things + # other than simply the position. Defaults to fixed rather than regular + # expression searching. + # + # Args: + # ...: see grep in base R; usually (pattern, text) + # fixed: Logical. If true, pattern is matched as-is. + # + # Returns: + # The position of the first occurrence of the pattern in the text. + regexpr(..., fixed = fixed)[1] +} + +startsWithAny <- function(x, prefixes) { + # Function extending startsWith for use with a vector of many prefixes. + # + # Args: + # x: Vector of characters whose starts will be examined. + # prefixes: Vector of characters which may be those starts. + # + # Returns: + # A logical vector denoting whether a given string in x starts with any + # of the prefixes provided. + apply( + # sapply startsWith over all prefixes, giving a table of logicals + sapply( + prefixes, + function(prefix) { + startsWith(x, prefix) + } + ), + # ...then, apply a massive logical 'or' over the rows of that table + MARGIN = 1, FUN = any + ) +} + +textAfter <- function(x, prefix) { + # Find and return text from strings starting with a prefix, after that prefix. + # + # Args: + # x: Vector of characters to examine. + # prefix: Single character string to search for and then discard where + # present. Accepts a vector as startsWith, but this would be a + # slightly strange use-case. + # + # Returns: + # A character vector of pieces of text which occur after the prefix + # specified. eg textAfter(c('a1', 'a2', 'b1'), 'a') would return + # c('1', '2'). + i <- startsWith(x, prefix) + substr(x[i], nchar(prefix) + 1, nchar(x[i])) +} + +randomString <- function(l, characters = letters, disallowed = NULL) { + # Generate a random string, with optional excision of disallowed sequences. + # + # Args: + # l: The length of the string to generate in number of components + # (usually single characters, see below) + # characters: Either a string eg 'Argh' which will be split into individual + # characters to act as string components, or a vector of components; + # no check is made so these can be multi-character + # disallowed: A vector of disallowed sequences. Defaults to NULL, which + # lets anything through. + # + # Returns: + # A string of length l picked from the characters provided, without any + # disallowed strings. + + + # if passed a single-element vector, it's almost certain they don't want a + # single string repeated 'randomly' over and over, so split it into characters + if(length(characters == 1)) { + characters <- unlist(strsplit(characters, '')) + } + # generate a random string by sampling from characters + random.string <- paste0( + sample( + characters, + l, + replace = TRUE + ), + collapse='' + ) + # if they've passed a disallowed vector, let's check none of the parts of the + # string contain it + if(!is.null(disallowed)) { + # loop over elements of disallowed + for(not.allowed in disallowed) { + # find matches of the forbidden string + not.allowed.matches <- gregexpr(not.allowed, random.string) + # if some matches are found... + while(not.allowed.matches[[1]][1] != -1) { + # ...loop over them, getting rid of one at a time + for(i in 1:length(not.allowed.matches[[1]])) { + substring( + random.string, + # start at the match point + not.allowed.matches[[1]][i], + # end at match point plus match length + not.allowed.matches[[1]][i] + attr(not.allowed.matches[[1]], 'match.length')[i] + ) <- + # and what better to replace them with than a string generated at + # random with this very function! + randomString( + attr(not.allowed.matches[[1]], 'match.length')[i], + characters, + disallowed + ) + } + # and then perform the test again to make sure we didn't introduce + # any unexpected disallowed patterns with the replacements... + not.allowed.matches <- gregexpr(not.allowed, random.string) + } + } + } + # return the random string + random.string +} + +randomStrings <- function(n, l, characters = letters, disallowed = NULL) { + # Generate n random strings; wrapper for the randomString function. + # + # Args: + # n: Number of random strings to generate + # ...: For other arguments, see randomString + # + # Returns: + # n random strings with the specified properties + replicate(n, randomString(l, characters, disallowed)) +} + +################################################################################ +### FACTORS ################################################################## +################################################################################ + +concatFactors <- function(...) { + # Takes some factors and concatenates them. R coerces factors to integers if + # you don't convert them to character vectors at the intermediate stage, so + # this saves typing that every time. + # + # Args: + # ...: Some factors + # + # Returns: + # A big factor. + factor(unlist(lapply(list(...), FUN=as.character))) +} + +factorChooseFirst <- function(x, first) { + # Move a chosen level to be the first in a factor. + # + # Args: + # x: A factor. + # first: The level in the factor you want to be first. + # + # Returns: + # A factor with the first level redefined to be the one specified. + + # if the level requested to be first isn't present, this ain't gonna work + if (!(first %in% levels(x))) { + stop(paste("Error: the level", first, "doesn't appear in the factor", + deparse(substitute(x)))) + } + factor(x, levels = c(first, levels(x)[levels(x) != first])) +} + +factorNAfix <- function(x, NAval = 'NA', force = FALSE) { + # Make NA values in a factor into their own level. + # + # Args: + # x: A factor. + # NAval: The value to replace NAs with. The string 'NA' by default. + # force: Whether to force the operation even if there aren't any NAs in + # the passed factor. + # + # Returns: + # A factor with NAs replaced by a specific level. + + # if it's forced, or if it's not but there are NAs present... + if(force | sum(is.na(x)) > 0) { + levels(x) <- c(levels(x), NAval) + x[is.na(x)] <- NAval + } + x +} + +factorOrderedLevels <- function(x, ...) { + # Create a factor with levels in the order of the + # + # Args: + # x: A list or vector. Works if the list's elements are themselves + # lists or vectors. + # + # Returns: + # TRUE or FALSE, depending. + if(length(x) == length(unique(x))) { + return(factor(x, levels = x)) + } else { + stop('Elements of x must all be unique.') + } +} + +allSame <- function(x) { + # Work out whether all elements of a list or vector are the same. + # + # Args: + # x: A list or vector. Works if the list's elements are themselves + # lists or vectors. + # + # Returns: + # TRUE or FALSE, depending. + length(unique(x)) == 1 +} + +allSameLength <- function(x) { + # Work out whether all elements of a list are the same length. + # + # Args: + # x: A list. + # + # Returns: + # TRUE or FALSE, depending. + length(unique(lapply(x, length))) == 1 +} + +################################################################################ +### LISTS #################################################################### +################################################################################ + +list2dataframe <- function(x) { + # Simple wrapper to very naively turn a list into a data frame. If your list + # elements have different numbers of elements, this will go wrong! + # + # Args: + # x: A list. + # + # Returns: + # A data frame made from the passed list. + data.frame(matrix(unlist(x), ncol = length(x[[1]]), byrow = TRUE)) +} + +################################################################################ +### FILES #################################################################### +################################################################################ + +list.dirs <- function(path=".", pattern=NULL, all.dirs=FALSE, + ignore.case=FALSE) { + # Lists the directories present within a path. + # Credit: http://stackoverflow.com/questions/4749783 + # + # Args: + # See list.files + # + # Returns: + # A vector of directories within the path being searched. + all <- list.files(path, pattern, all.dirs, + full.names=TRUE, recursive=FALSE, ignore.case) + all[file.info(all)$isdir] +} + +writeTablePlus <- function(data, filename, comment = '', sep = '\t', + comment.char = '#', row.names = FALSE, + col.names = TRUE, ...) { + # A wrapper for the write.table function which adds a comment of your choice + # at the top of the file. + # + # Args: + # filename: The name of the file to be written. + # comment: The comment to be added at the top of the file. + # sep: The separator for the data, tab by default. + # comment.char: The character denoting comments, # by default. + # row.names: FALSE by default, because who wants row names? + # col.names: TRUE by default, because everyone wants column names! + # ... : Allows arbitrary extra arguments relevant to write.table. + # + # Returns: + # Nothing! + + f <- file(filename, open="wt") # open a connection to the file + # if there's a comment, write that first + if(nchar(comment) > 0) { + # wrap the comment at 80 lines prefixed the comment character plus space + comment <- + strwrap(comment, width = 80, prefix = paste(comment.char, ' ', sep = '')) + writeLines(comment, f) + } + write.table( + data, f, sep=sep, row.names = row.names, col.names = col.names, ... + ) + close(f) +} + +readTablePlus <- function(filename, sep='\t', comment.char='#', header=TRUE, + ...) { + # Handy wrapper for the read.table function to make it compatible with the + # writeTablePlus function with its default options. + # + # Args: + # filename: The name of the file to read. If it's a vector, the function + # read the list of files provided and concatenate them into a + # single data frame. + if(length(filename) > 1) { + do.call(rbind, lapply(filename, readTablePlus)) + } else { + read.table(filename, sep=sep, comment.char=comment.char, header=header, ...) + } +} + +readXlsxWriteTables <- function(filename, output.ext = 'tsv', sheet.names = NA, + ...) { + # Write a multi-workbook xlsx file to a series of text files. Currently not + # that useful, as neither read.xlsx nor read.xlsx2 does a very good job of + # identifying column types, and you end up with a TSV composed of strings + # + # Args: + # filename: The name of the Excel file to read. + # output.ext: The extension of the output file(s). + # sheet.names: The names of the sheets to be written out. Default NA will + # write all sheets which are present. + # ... : Allows arbitrary extra arguments to writeTablePlus + + # Require the xlsx library - not loaded by default as it's not often needed + requirePlus('xlsx') + + # If no sheet names were provided... + if(is.na(sheet.names)) { + # ...get the sheet names by a few nested functions + sheet.names <- names(getSheets(loadWorkbook(filename))) + } + + base.filename <- justFilename(filename) + + # For the provided sheet names, go through and read them, and then write them + for(sheet.name in sheet.names) { + writeTablePlus( + # read.xlsx2 is faster, and also assumes tabular data with a header, + # probably the more likely use-case here + read.xlsx2(filename, sheetName = sheet.name), + # filename_sheetname.tsv + paste0(base.filename, '_', sheet.name, '.', output.ext), + ... + ) + } +} + +justFilename <- function(x) { + # Returns filenames without extensions. + # + # Args: + # x: A character or vector of characters containing filenames, with or + # without paths. + # + # Returns: + # The string or vector with everything after and including a final full stop + # removed. + sapply(strsplit(basename(x),"\\."), + function(x) paste(x[1:(length(x)-1)], + collapse=".") + ) +} + +fileExt <- function(x) { + # Returns just extensions from filenames. + # + # Args: + # x: A character or vector of characters containing filenames, with or + # without paths. + # + # Returns: + # Everything after a final full stop. + + # split the strings by full stops, and only take the final element + extensions <- sapply(strsplit(basename(x),"\\."), + function(x) tail(x, 1) + ) + # where the extension is the same as the input filename, there is no extension + extensions[extensions == x] <- '' + + extensions +} + +suffixFilename <- function(x, suffix = '_1') { + # Returns a filename with a suffix appended before its extension. + # + # Args: + # x: A character or vector of characters containing filenames. + # + # Returns: + # filename_suffix.ext + paste0(justFilename(x), suffix, '.', fileExt(x)) +} + +################################################################################ +### MATHEMATICS ############################################################## +################################################################################ + +floorPlus <- function(x, digits = 0) { + # the function floor but with a "digits" option to floor not just to the + # nearest integer + # + # Args: + # x: vector to floor + # digits: Number of decimal places to be used + # + # Returns: + # integer/vector. + floor(x*(10^digits)) / 10^digits +} + +ceilPlus <- function(x, digits = 0) { + # the function ceil but with a "digits" option to floor not just to the + # nearest integer + # + # Args: + # x: vector to floor + # digits: Number of decimal places to be used + # + # Returns: + # integer/vector. + ceil(x*(10^digits)) / 10^digits +} + +tri <- function(x) { + # Calculates the xth triangular number. + # + # Args: + # x: A number. + # + # Returns: + # The xth triangular number. + x * (x + 1) / 2 +} + +trirt <- function(x) { + # Calculates the triangular root of a number. + # + # Args: + # x: A number. + # + # Returns: + # Its triangular root. + (sqrt(8*x + 1) - 1) / 2 +} + +# A series of functions which allow arithmetic on quantities with uncertainty. +# Create a quantity by passing values or vectors to unum(x, dx), and then add, +# subtract, multiply or divide with the functions below. +unum <- function(x, dx) { data.frame(x=x, dx=dx) } + # Calculates the triangular root of a number. + # + # Args: + # x: A number or vector of numbers. + # dx: A number or vector of numbers representing the uncertainty on x. + # + # Returns: + # A data frame with columns x and dx which can be used for further + # operations. +uadd <- function(a, b) { + z <- a$x + b$x + dz <- sqrt(a$dx^2 + b$dx^2) + unum(z, dz) +} +usub <- function(a, b) { + z <- a$x - b$x + dz <- sqrt(a$dx^2 + b$dx^2) + unum(z, dz) +} +umul <- function(a, b) { + z <- a$x * b$x + dz <- z * sqrt((a$dx/a$x)^2 + (b$dx/b$x)^2) + unum(z, dz) +} +udiv <- function(a, b) { + z <- a$x / b$x + dz <- z * sqrt((a$dx/a$x)^2 + (b$dx/b$x)^2) + unum(z, dz) +} + +normalise <- function(x, FUN = sum) { + # Returns a vector normalised by the function FUN, default being sum so the + # vector would now sum to 1. Another example would be max, so the largest + # value in x becomes 1. + # + # Args: + # x: A vector. + # FUN: A function which returns a single value when applied to a vector. + # + # Returns: + # A vector, normalised appropriately. + if(!is.function(FUN)) stop('Passed FUN is not a function') + x / FUN(x) +} + +minGt <- function(x, gt = 0) { + # Return the minimum value of a vector greater than a value gt. + # + # Args: + # x: A vector. + # q: The value which this minimum value must be greater than, defaulting to + # 0 (which returns the minimum positive value). + # + # Returns: + # The minimum positive value, eg c(-1, 0, 2, 4) would return 2. + min(x[x > gt]) +} + +minPositive <- function(x) { + # Return the minimum positive value of a vector. Wrapper for minGt. + # + # Args: + # x: A vector. + # Returns: + # The minimum positive value, eg c(-1, 0, 2, 4) would return 2. + minGt(x, 0) +} + +################################################################################ +### STATISTICS ############################################################### +################################################################################ + +stdErr <- function(x) { sqrt(var(x)/length(x)) } + # For a vector x, returns the standard error on the mean. + # + # Args: + # x: A vector. + # + # Returns: + # The standard error on the mean. + +cv <- function(x) { sd(x)/mean(x) } + # For a vector x, returns the coefficient of variation. + # + # Args: + # x: A vector. + # + # Returns: + # The coefficient of variation. + +covar <- function(x) { + # Wrapper function which returns the variance for a single-column vector and + # a covariance matrix for a multi-column vector. + # + # Args: + # x: Some data. + # + # Returns: + # The covariance matrix. + if(is.null(dim(x))) { + return(var(x)) + } else { + return(cov(x)) + } +} + +popvar <- function(x, na.rm = FALSE) { + # Calculates population variance instead of sample variance (which is the + # default of the var() function in R). + # + # Args: + # x: a vector of the population data. + # na.rm: a logical value indicating whether NA values should be stripped + # before the computation proceeds. + # + # Returns: + # The population variance. + if(na.rm) { + x <- x[!is.na(x)] + } else if(any(is.na(x))) { + return(NA) + } + mean((x-mean(x))^2) +} + +weightedMeanPlus <- function(x, w, na.rm = FALSE) { + # Compute a weighted mean, where the na.rm argument ignores NA values in both + # the values and their weights. (The default R function returns NA if any + # weight is NA even with na.rm = TRUE.) + # + # Args: + # x: an object containing the values whose weighted mean is to be + # computed. + # w: a numerical vector of weights the same length as x giving the + # weights to use for elements of x. + # na.rm: a logical value indicating whether NA values should be stripped + # before the computation proceeds. + # + # Returns: + # The weighted mean. + if(na.rm) { + not.na <- !(is.na(x) | is.na(w)) + wm <- weighted.mean(x[not.na], w[not.na]) + } else { + wm <- weighted.mean(x, w) + } + wm +} + +################################################################################ +### MISCELLANEOUS ############################################################ +################################################################################ + +NA2val <- function(x, val = 0) { + # Wrapper to turn NAs in an object into a value of your choice. + # + # Args: + # x: The object containing errant NA values. + # val: The value to replace the NAs with, default 0. + # + # Returns: + # The object with the NAs replaced appropriately. + x[is.na(x)] <- val + x +} + +isExactlyNA <- function(x) { + # Is an object NA, or is it another kind of object? Unlike is.na, this is not + # a vector operation and doesn't return eg a vector with whether or not each + # value is NA, it would simply return FALSE because the object isn't an NA. + # This is to prevent warnings when performing if statements on things like + # optional arguments where NA is the default, but a vector could be passed, + # and the if statement then warns that only the first element was used. + # + # Args: + # x: The object to be tested. + # + # Returns: + # TRUE if the object is literally just NA; FALSE otherwise. + + # NA is of type logical, so in order to be a true NA, it must be... + if(is.logical(x) & length(x) == 1) { + # If so, is it NA? + return(is.na(x)) + } else { + # Otherwise, return FALSE + return(FALSE) + } +} + +firstElement <- function(x) { + # Function for apply-ing to lists which will return the first element of a + # list element + # Args: + # x: An object with elements. + # + # Returns: + # The first element of that object. + x[1] +} + +NArm <- function(x) { + # Remove NAs from a vector. + x[!is.na(x)] +} + + +samplePlus <- function(x, ..., na.rm = TRUE, only.unique = FALSE) { + # Extension of the sample function from base R with the option of only + # sampling from non-missing values. + # + # Args: + # x: A vector of one or more elements from which to choose. + # ...: Other arguments to sample (ie size, replace, prob) + # na.rm: Whether or not to remove NAs. Default TRUE since otherwise why are + # you using this wrapper function? + # only.unique: Sample from only the unique values of x? + # + # Returns: + # A sample from the vector (see sample documentation), without NAs if na.rm + # is set to TRUE, and only drawn from unique values of x is only.unique is + # set to TRUE. + + if(na.rm) { + x <- NArm(x) + } + if(only.unique) { + x <- unique(x) + } + sample(x, ...) +} + +permute <- +function( + # Randomly permute (some) elements of a vector or character string to create a + # (slightly) randomised version of it. + # + # Args: + x, + # A vector or character string to have its contents permuted. + frac = 1.0, + # The fraction of the contents to be permuted, from 0 (no permutation) + # to 1 (permute everything). + n.permute = NA + # The number of items to permute. Defaults to being calculated from frac + # but can be specified manually too. Must be a multiple of 2, because + # elements are swapped in pairs. + # + # Returns: + # The vector or string with n.permute of its elements permuted. +) { + # if frac is not a fraction, throw an error + if(frac < 0.0 | frac > 1.0) { + stop(paste0('frac = ', frac,'; it must be between 0 and 1.')) + } else if(!is.na(n.permute) & n.permute %% 2 != 0) { + stop(paste0('n.permute = ', n.permute,'; it must be divisible by 2.')) + } + + # if x is a character string, make it into a vector for processing and set a + # reminder to put it back as a string before returning + if(class(x) == 'character') { + x.is.string <- TRUE + x <- strsplit(x, '')[[1]] + } else { + x.is.string <- FALSE + } + + # if n.permute was not provided, we can now calculate it + if(is.na(n.permute)) { + n.permute <- round(length(x)*frac / 2) * 2 # make sure it's a multiple of 2! + } + + # if n.permute is longer than the vector... + if(n.permute > length(x)) { + stop(paste0('n.permute = ', n.permute,', which is greater than the length ', + 'of the string or vector provided, ', length(x))) + } + + # Create a random sample for pairs of positions to swap between + swapsies <- sample(1:length(x), n.permute) + + # take those swapping positions and move them around; reversing the indices in + # the second part of the function implies that position 1 will swap with + # position n, 2 with n-1, etc... + x <- replace(x, swapsies, x[rev(swapsies)]) + + # if it was a string then reassemble it before returning + if(x.is.string) { + x <- paste(x, collapse='') + } + + x +} + +requirePlus <- function(..., install = TRUE, quietly = TRUE) { + # Simply require a number of packages in the same command, and install them if + # not present. + # + # Args: + # packages: A vector of the names of the packages to be imported. + # install: Logical indicating whether missing packages should be installed. + # quietly: As with require, quietly suppresses messages. + # + # Returns: + # Nothing (though warning and error messages are displayed on failure) + + package.list <- c(...) + # if the install parameter is true, install missing packages + if(install) { + # Check for missing packages + packages.present <- package.list %in% rownames(installed.packages()) + # And, if there are any, install them + if(any(!packages.present)) { + message( + paste('Installing missing packages', + paste(package.list[!packages.present], collapse = ', ') + ) + ) + install.packages(package.list[!packages.present]) + } + } + # loop over packages, importing them + require.success <- unlist( + # suppress warnings, because we'll tell the user which packages failed later + suppressWarnings( + lapply(package.list, require, character.only = TRUE, quietly = quietly) + ) + ) + + # If at least some packages imported successfully and the user hasn't asked + # for quietness... + if(sum(require.success) > 0 & !quietly) { + message( + paste('Successfully imported packages', + paste(package.list[require.success], collapse = ', ') + ) + ) + } + + # If at least some packages failed, warn the user regardless of quietly arg + if(sum(!require.success) > 0) { + warning( + paste('Failed to import packages', + paste(package.list[!require.success], collapse = ', ') + ) + ) + } +} + +initParallel <- function(cores = NULL, backend = 'doMC') { + # Wrapper to initialise parallel computing functionality. + # + # Args: + # cores: The number of cores to use simultaneously. If absent, use the + # default from the relevant backend. + # backend: Which backend to use. Currently supports doMC and doParallel. + # + # Returns: + # Nothing. + + if(backend == 'doMC') { + requirePlus('doMC', 'foreach') + registerDoMC(cores) + } else if(backend == 'doParallel') { + requirePlus('doParallel', 'foreach') + cl <- makeCluster(cores) + registerDoParallel(cl) + return(cl) + } else { + stop('Unrecognised backend', backend) + } +} + +inRange <- + function(x, rang, incl.end = rep(FALSE, length(rang)), na.false = FALSE) { + # Returns a vector of booleans specifying whether values of x fall within the + # range rang. + # + # Args: + # x: A vector of numbers. + # rang: A vector specifying the range within which they are permitted to + # fall. It can be of any length; only the smallest and largest + # values are used. Called 'rang' so as not to clash with the + # 'range' function in base R. + # incl.end: A vector of the same length as rang specifying whether a given + # endpoint is included or excluded from the range. + # na.false: Return NA values as not in range? + # + # Returns: + # A boolean with the same length of x, TRUE if within the range specified, + # FALSE otherwise. + + # find the indices of the smallest and largest values of the vector + # (presumably usually there will only be two!) + i.min <- which.min(rang) + i.max <- which.max(rang) + + # find those which are bigger than the minimum value (including endpoint if + # specified)... + if(incl.end[i.min]) {x >= rang[i.min]} else {x > rang[i.min]} & + # ...and those smaller than the max... + if(incl.end[i.max]) {x <= rang[i.max]} else {x < rang[i.max]} & + # ...and, if applicable, set NA values to false? + if(na.false) {!is.na(x)} else {TRUE} + # ...and return it! +} + +logfileStart <- function(filename = default.logfile.name) { + # Wrapper for creating a new blank log file during script execution. + # NB This will silently overwrite existing files! + # + # Args: + # filename: The name of the file to create. + # + # Returns: + # Nothing. + # + # Globals: + # Creates a global variable called logfileName so that related functions + # know where to write to. + logfileName <<- filename + cat('', file = filename) +} + +logfileCat <- function(..., + newline = TRUE, sep = "", fill = FALSE, + filename = logfileName + ) { + # Wrapper for adding an entry to a log file. + # + # Args: + # ... : Stuff to write to the file + # newline: Whether to start a new line after the entry. + # sep: Separator between objects to write. + # fill: The fill option for cat(). + # filename: The name of the file to write to; default being the global + # variable set by logfileStart. + # + # Returns: + # Nothing. + if(newline & !fill) append.me <- "\n" else append.me <- NULL + cat(..., append.me, file = filename, sep = sep, fill = fill, + append = TRUE) +} + +logfileEnd <- function() { + # Wrapper for blanking the existing logfileName such that no further entries + # are written to it given the default options for logfileCat. + # + # Args: + # None. + # + # Returns: + # Nothing. + # + # Globals: + # Sets logfileName to "". + logfileName <<- "" +} + +unixTimestamp <-function() { + # Quick function to generate the UNIX timestamp. + # + # Args: + # None. + # + # Returns: + # Time in whole seconds since the start of the Unix epoch (01/01/1970 UTC) + as.numeric(Sys.time()) +} + +getUserInput <- function(s, parse.fun = NULL, validate.fun = NULL, e = NULL) { + # Get input from the user typing at the terminal. + # + # Args: + # s: The question to present the user with. + # parse.fun: Optional function with which to parse the input string. + # validate.fun: Optional function with which to validate the input string. + # e: Error message to display if the (cleaned) value fails validation. + # + # Returns: + # A parsed, validated input value. + + # loop, to keep asking for input if there's a problem + repeat { + # get the user's input by asking them s + user.input <- readline(s) + # if a function to clean input has been specified, run it + if(!is.null(parse.fun)) { + user.input <- parse.fun(user.input) + } + # if a function to validate input has been specified + if(!is.null(validate.fun)) { + # if the input validates... + if(validate.fun(user.input)) { + # ...the pass it back + return(user.input) + } + } else { + # if there's no validation to perform, return it anyway + return(user.input) + } + # if we've got this far, validation must have failed...print an error + # message and have another try... + cat(e) + } +} + +getUserInputInteger <- function(s) { + # Wrapper function to use getUserInput to acquire an integer from the user. + # + # Args: + # s: The question to present the user with. + # + # Returns: + # An integer. + getUserInput(s, + parse.fun = function(x){ + suppressWarnings(as.integer(x)) + }, + validate.fun = function(x) { + ifelse(!is.na(x), TRUE, FALSE) + }, + e = 'Could not parse input as an integer.') +} + + + +handyTimer <- function(t = NA, numeric = TRUE) { + # Wrapper function for easy to read timing code. + # + # Args: + # t: The time. Pass nothing (or NA) and the function will return the current + # time, ie start the clock. Pass a numeric or time object (ie a + # previously stored start time), and the function returns the difference + # (ie a time you wanted to measure). + # numeric: Whether to convert your time into a simple numerical value. FALSE + # results in returning an R time or time difference object. + # + # Returns: + # Either the current time, or the time since t. + + # If t is NA, get current time + if(is.na(t)) { + t <- proc.time()['elapsed'] + # Otherwise, get time difference + } else { + t <- proc.time()['elapsed'] - t + } + + # If the user wanted a numerical answer, as.numeric it + if(numeric) { + t <- as.numeric(t) + } + + # Return t + t +} + +varName <- function(...) { + # Get the name of a variable as a string. Inspired by: + # http://stackoverflow.com/questions/14577412/ + # Args: + # x: A variable, eg myvar. + # + # Returns: + # The name of the variable, eg 'myvar'. + + do.call( + function(x) { deparse(substitute(x)) }, + ... + + ) +} + +varsToTable <- function(df, filename, index.cols = 1, ...) { + # Function to serialise variables for storage as a simple table in a text + # file. Takes a data frame df with columns (eg id1, id2, var1, var2) and, if + # a preexisting value is found with the same index.cols (eg c('id1', 'id2'), + # or 1:2 in this example), replaces the values (var1 and var2) or, if not, + # appends them, then writes the results to filename. + # + # TODO: Check that df has the same shape/column names(?) as the file. + # TODO: Check for repeated indices in the df provided. + # + # Args: + # df: A data frame of indices and values to store. + # filename: A file in which to store them. + # index.cols: Which columns to use as indices, to find and replace existing + # values with the same index. + # ...: Extra arguments for writeTablePlus. + # + # Returns: + # Nothing, just writes the data to file. + if(file.exists(filename)) { + vars.table <- readTablePlus(filename) + # First, append our data frame to the existing table + vars.table <- rbind(vars.table, df) + + # Find the duplicate values of just the index columns, searching from the + # end because we want to keep the newer values we just rbind-ed in that + # case, and only keep unique ones. + vars.table <- + vars.table[!(duplicated(vars.table[, index.cols], fromLast = TRUE)), ] + + } else { + # If there's no existing file, just create a fresh data frame + vars.table <- df + } + + # Write the resulting data frame to a file of the given name + writeTablePlus(vars.table, filename, ...) +} \ No newline at end of file