--- a +++ b/man/pheatmapwh.Rd @@ -0,0 +1,268 @@ +\name{pheatmapwh} +\alias{pheatmapwh} +\title{A modification of \code{\link[pheatmap]{pheatmap}} from the pheatmap package by Raivo Kolde: draw clustered heatmaps.} +\usage{ +pheatmapwh(mat, color = colorRampPalette(rev(brewer.pal(n = 7, name = + "RdYlBu")))(100), kmeans_k = NA, breaks = NA, border_color = "grey60", + cellwidth = NA, cellheight = NA, scale = "none", cluster_rows = TRUE, + cluster_cols = TRUE, clustering_distance_rows = "euclidean", + clustering_distance_cols = "euclidean", clustering_method = "complete", + clustering_callback = identity2, cutree_rows = NA, cutree_cols = NA, + treeheight_row = ifelse(cluster_rows, 50, 0), + treeheight_col = ifelse(cluster_cols, 50, 0), legend = TRUE, + legend_breaks = NA, legend_labels = NA, annotation_row = NA, + annotation_col = NA, annotation = NA, annotation_colors = NA, + annotation_legend = TRUE, drop_levels = TRUE, show_rownames = T, + show_colnames = T, main = NA, fontsize = 10, fontsize_row = fontsize, + fontsize_col = fontsize, display_numbers = F, number_format = "\%.2f", + number_color = "grey30", fontsize_number = 0.8 * fontsize, + gaps_row = NULL, gaps_col = NULL, labels_row = NULL, + labels_col = NULL, filename = NA, width = NA, height = NA, + silent = FALSE, ...) +} +\arguments{ +\item{mat}{numeric matrix of the values to be plotted.} + +\item{color}{vector of colors used in heatmap.} + +\item{kmeans_k}{the number of kmeans clusters to make, if we want to agggregate the +rows before drawing heatmap. If NA then the rows are not aggregated.} + +\item{breaks}{a sequence of numbers that covers the range of values in mat and is one +element longer than color vector. Used for mapping values to colors. Useful, if needed +to map certain values to certain colors, to certain values. If value is NA then the +breaks are calculated automatically.} + +\item{border_color}{color of cell borders on heatmap, use NA if no border should be +drawn.} + +\item{cellwidth}{individual cell width in points. If left as NA, then the values +depend on the size of plotting window.} + +\item{cellheight}{individual cell height in points. If left as NA, +then the values depend on the size of plotting window.} + +\item{scale}{character indicating if the values should be centered and scaled in +either the row direction or the column direction, or none. Corresponding values are +\code{"row"}, \code{"column"} and \code{"none"}} + +\item{cluster_rows}{boolean values determining if rows should be clustered,} + +\item{cluster_cols}{boolean values determining if columns should be clustered.} + +\item{clustering_distance_rows}{distance measure used in clustering rows. Possible +values are \code{"correlation"} for Pearson correlation and all the distances +supported by \code{\link{dist}}, such as \code{"euclidean"}, etc. If the value is none +of the above it is assumed that a distance matrix is provided.} + +\item{clustering_distance_cols}{distance measure used in clustering columns. Possible +values the same as for clustering_distance_rows.} + +\item{clustering_method}{clustering method used. Accepts the same values as +\code{\link{hclust}}.} + +\item{clustering_callback}{callback function to modify the clustering. Is +called with two parameters: original \code{hclust} object and the matrix +used for clustering. Must return a \code{hclust} object.} + +\item{cutree_rows}{number of clusters the rows are divided into, based on the + hierarchical clustering (using cutree), if rows are not clustered, the +argument is ignored} + +\item{cutree_cols}{similar to \code{cutree_rows}, but for columns} + +\item{treeheight_row}{the height of a tree for rows, if these are clustered. +Default value 50 points.} + +\item{treeheight_col}{the height of a tree for columns, if these are clustered. +Default value 50 points.} + +\item{legend}{logical to determine if legend should be drawn or not.} + +\item{legend_breaks}{vector of breakpoints for the legend.} + +\item{legend_labels}{vector of labels for the \code{legend_breaks}.} + +\item{annotation_row}{data frame that specifies the annotations shown on left + side of the heatmap. Each row defines the features for a specific row. The +rows in the data and in the annotation are matched using corresponding row + names. Note that color schemes takes into account if variable is continuous + or discrete.} + +\item{annotation_col}{similar to annotation_row, but for columns.} + +\item{annotation}{deprecated parameter that currently sets the annotation_col if it is missing} + +\item{annotation_colors}{list for specifying annotation_row and +annotation_col track colors manually. It is possible to define the colors +for only some of the features. Check examples for details.} + +\item{annotation_legend}{boolean value showing if the legend for annotation +tracks should be drawn.} + +\item{drop_levels}{logical to determine if unused levels are also shown in +the legend} + +\item{show_rownames}{boolean specifying if column names are be shown.} + +\item{show_colnames}{boolean specifying if column names are be shown.} + +\item{main}{the title of the plot} + +\item{fontsize}{base fontsize for the plot} + +\item{fontsize_row}{fontsize for rownames (Default: fontsize)} + +\item{fontsize_col}{fontsize for colnames (Default: fontsize)} + +\item{display_numbers}{logical determining if the numeric values are also printed to +the cells. If this is a matrix (with same dimensions as original matrix), the contents +of the matrix are shown instead of original values.} + +\item{number_format}{format strings (C printf style) of the numbers shown in cells. +For example "\code{\%.2f}" shows 2 decimal places and "\code{\%.1e}" shows exponential +notation (see more in \code{\link{sprintf}}).} + +\item{number_color}{color of the text} + +\item{fontsize_number}{fontsize of the numbers displayed in cells} + +\item{gaps_row}{vector of row indices that show shere to put gaps into +heatmap. Used only if the rows are not clustered. See \code{cutree_row} +to see how to introduce gaps to clustered rows.} + +\item{gaps_col}{similar to gaps_row, but for columns.} + +\item{labels_row}{custom labels for rows that are used instead of rownames.} + +\item{labels_col}{similar to labels_row, but for columns.} + +\item{filename}{file path where to save the picture. Filetype is decided by +the extension in the path. Currently following formats are supported: png, pdf, tiff, + bmp, jpeg. Even if the plot does not fit into the plotting window, the file size is +calculated so that the plot would fit there, unless specified otherwise.} + +\item{width}{manual option for determining the output file width in inches.} + +\item{height}{manual option for determining the output file height in inches.} + +\item{silent}{do not draw the plot (useful when using the gtable output)} + +\item{\dots}{graphical parameters for the text used in plot. Parameters passed to +\code{\link{grid.text}}, see \code{\link{gpar}}.} +} +\value{ +Invisibly a list of components +\itemize{ + \item \code{tree_row} the clustering of rows as \code{\link{hclust}} object + \item \code{tree_col} the clustering of columns as \code{\link{hclust}} object + \item \code{kmeans} the kmeans clustering of rows if parameter \code{kmeans_k} was +specified +} +} +\description{ +A function to draw clustered heatmaps where one has better control over some graphical +parameters such as cell size, etc. +} +\details{ +The function also allows to aggregate the rows using kmeans clustering. This is +advisable if number of rows is so big that R cannot handle their hierarchical +clustering anymore, roughly more than 1000. Instead of showing all the rows +separately one can cluster the rows in advance and show only the cluster centers. +The number of clusters can be tuned with parameter kmeans_k. +} +\examples{ +# Create test matrix +test = matrix(rnorm(200), 20, 10) +test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3 +test[11:20, seq(2, 10, 2)] = test[11:20, seq(2, 10, 2)] + 2 +test[15:20, seq(2, 10, 2)] = test[15:20, seq(2, 10, 2)] + 4 +colnames(test) = paste("Test", 1:10, sep = "") +rownames(test) = paste("Gene", 1:20, sep = "") + +# Draw heatmaps +pheatmapwh(test) +pheatmapwh(test, kmeans_k = 2) +pheatmapwh(test, scale = "row", clustering_distance_rows = "correlation") +pheatmapwh(test, color = colorRampPalette(c("navy", "white", "firebrick3"))(50)) +pheatmapwh(test, cluster_row = FALSE) +pheatmapwh(test, legend = FALSE) + +# Show text within cells +pheatmapwh(test, display_numbers = TRUE) +pheatmapwh(test, display_numbers = TRUE, number_format = "\\\%.1e") +pheatmapwh(test, display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test))) +pheatmapwh(test, cluster_row = FALSE, legend_breaks = -1:4, legend_labels = c("0", +"1e-4", "1e-3", "1e-2", "1e-1", "1")) + +# Fix cell sizes and save to file with correct size +pheatmapwh(test, cellwidth = 15, cellheight = 12, main = "Example heatmap") +pheatmapwh(test, cellwidth = 15, cellheight = 12, fontsize = 8, filename = "test.pdf") + +# Generate annotations for rows and columns +annotation_col = data.frame( + CellType = factor(rep(c("CT1", "CT2"), 5)), + Time = 1:5 + ) +rownames(annotation_col) = paste("Test", 1:10, sep = "") + +annotation_row = data.frame( + GeneClass = factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 6))) + ) +rownames(annotation_row) = paste("Gene", 1:20, sep = "") + +# Display row and color annotations +pheatmapwh(test, annotation_col = annotation_col) +pheatmapwh(test, annotation_col = annotation_col, annotation_legend = FALSE) +pheatmapwh(test, annotation_col = annotation_col, annotation_row = annotation_row) + + +# Specify colors +ann_colors = list( + Time = c("white", "firebrick"), + CellType = c(CT1 = "#1B9E77", CT2 = "#D95F02"), + GeneClass = c(Path1 = "#7570B3", Path2 = "#E7298A", Path3 = "#66A61E") +) + +pheatmapwh(test, annotation_col = annotation_col, annotation_colors = ann_colors, main = "Title") +pheatmapwh(test, annotation_col = annotation_col, annotation_row = annotation_row, + annotation_colors = ann_colors) +pheatmapwh(test, annotation_col = annotation_col, annotation_colors = ann_colors[2]) + +# Gaps in heatmaps +pheatmapwh(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14)) +pheatmapwh(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14), + cutree_col = 2) + +# Show custom strings as row/col names +labels_row = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", +"", "", "Il10", "Il15", "Il1b") + +pheatmapwh(test, annotation_col = annotation_col, labels_row = labels_row) + +# Specifying clustering from distance matrix +drows = dist(test, method = "minkowski") +dcols = dist(t(test), method = "minkowski") +pheatmapwh(test, clustering_distance_rows = drows, clustering_distance_cols = dcols) + +# Modify ordering of the clusters using clustering callback option +callback = function(hc, mat){ + sv = svd(t(mat))$v[,1] + dend = reorder(as.dendrogram(hc), wts = sv) + as.hclust(dend) +} + +pheatmapwh(test, clustering_callback = callback) + +\dontrun{ +# Same using dendsort package +library(dendsort) + +callback = function(hc, ...){dendsort(hc)} +pheatmapwh(test, clustering_callback = callback) +} +} +\author{ +Raivo Kolde <rkolde@gmail.com> +} +