|
a |
|
b/man/dummy_cols.Rd |
|
|
1 |
% Generated by roxygen2: do not edit by hand |
|
|
2 |
% Please edit documentation in R/auxiliary.R |
|
|
3 |
\name{dummy_cols} |
|
|
4 |
\alias{dummy_cols} |
|
|
5 |
\title{Fast creation of dummy variables} |
|
|
6 |
\usage{ |
|
|
7 |
dummy_cols( |
|
|
8 |
.data, |
|
|
9 |
select_columns = NULL, |
|
|
10 |
remove_first_dummy = FALSE, |
|
|
11 |
remove_most_frequent_dummy = FALSE, |
|
|
12 |
ignore_na = FALSE, |
|
|
13 |
split = NULL, |
|
|
14 |
remove_selected_columns = FALSE, |
|
|
15 |
omit_colname_prefix = FALSE |
|
|
16 |
) |
|
|
17 |
} |
|
|
18 |
\arguments{ |
|
|
19 |
\item{.data}{An object with the data set you want to make dummy columns from.} |
|
|
20 |
|
|
|
21 |
\item{select_columns}{Vector of column names that you want to create dummy variables from. |
|
|
22 |
If NULL (default), uses all character and factor columns.} |
|
|
23 |
|
|
|
24 |
\item{remove_first_dummy}{Removes the first dummy of every variable such that only n-1 dummies remain. |
|
|
25 |
This avoids multicollinearity issues in models.} |
|
|
26 |
|
|
|
27 |
\item{remove_most_frequent_dummy}{Removes the most frequently observed category such that only n-1 dummies |
|
|
28 |
remain. If there is a tie for most frequent, will remove the first |
|
|
29 |
(by alphabetical order) category that is tied for most frequent.} |
|
|
30 |
|
|
|
31 |
\item{ignore_na}{If TRUE, ignores any NA values in the column. If FALSE (default), then it |
|
|
32 |
will make a dummy column for value_NA and give a 1 in any row which has a |
|
|
33 |
NA value.} |
|
|
34 |
|
|
|
35 |
\item{split}{A string to split a column when multiple categories are in the cell. For |
|
|
36 |
example, if a variable is Pets and the rows are "cat", "dog", and "turtle", |
|
|
37 |
each of these pets would become its own dummy column. If one row is "cat, dog", |
|
|
38 |
then a split value of "," this row would have a value of 1 for both the cat |
|
|
39 |
and dog dummy columns.} |
|
|
40 |
|
|
|
41 |
\item{remove_selected_columns}{If TRUE (not default), removes the columns used to generate the dummy columns.} |
|
|
42 |
|
|
|
43 |
\item{omit_colname_prefix}{If TRUE (not default) and `length(select_columns) == 1`, omit pre-pending the |
|
|
44 |
name of `select_columns` to the names of the newly generated dummy columns} |
|
|
45 |
} |
|
|
46 |
\value{ |
|
|
47 |
A data.frame (or tibble or data.table, depending on input data type) with |
|
|
48 |
same number of rows as inputted data and original columns plus the newly |
|
|
49 |
created dummy columns. |
|
|
50 |
} |
|
|
51 |
\description{ |
|
|
52 |
Quickly create dummy (binary) columns from character and |
|
|
53 |
factor type columns in the inputted data (and numeric columns if specified.) |
|
|
54 |
This function is useful for statistical analysis when you want binary |
|
|
55 |
columns rather than character columns. Adapted from the \code{fastDummies} package (https://jacobkap.github.io/fastDummies/) |
|
|
56 |
} |