correlation.R
This function creates a publication-ready correlation table. If used correctly, the output displays a diagonally formatted correlation table with mean and standard deviation as the first rows and the cronbachs alpha values on the diagonal, if desired. See here how to use it.
Basic R function to call:
library(devtools)
source_url("https://lucasmaunz.org/download/correlation.R")
Alternatively: Download this script.
#Full credit to:
#https://paulvanderlaken.com/2020/07/28/publication-ready-correlation-matrix-significance-r/#correlation_matrix
correlation_matrix <- function(df,
type = "pearson",
digits = 3,
decimal.mark = ".",
use = "all",
show_significance = TRUE,
replace_diagonal = FALSE,
replacement = ""){
# check arguments
stopifnot({
is.numeric(digits)
digits >= 0
use %in% c("all", "upper", "lower")
is.logical(replace_diagonal)
is.logical(show_significance)
is.character(replacement)
})
# we need the Hmisc package for this
require(Hmisc)
# retain only numeric and boolean columns
isNumericOrBoolean = vapply(df, function(x) is.numeric(x) | is.logical(x), logical(1))
if (sum(!isNumericOrBoolean) > 0) {
cat('Dropping non-numeric/-boolean column(s):', paste(names(isNumericOrBoolean)[!isNumericOrBoolean], collapse = ', '), '\n\n')
}
df = df[isNumericOrBoolean]
# transform input data frame to matrix
x <- as.matrix(df)
# run correlation analysis using Hmisc package
correlation_matrix <- Hmisc::rcorr(x, type = type)
R <- correlation_matrix$r # Matrix of correlation coeficients
p <- correlation_matrix$P # Matrix of p-value
# transform correlations to specific character format
Rformatted = formatC(R, format = 'f', digits = digits, decimal.mark = decimal.mark)
# if there are any negative numbers, we want to put a space before the positives to align all
if (sum(!is.na(R) & R < 0) > 0) {
Rformatted = ifelse(!is.na(R) & R > 0, paste0(" ", Rformatted), Rformatted)
}
# add significance levels if desired
if (show_significance) {
# define notions for significance levels; spacing is important.
stars <- ifelse(is.na(p), "", ifelse(p < .001, "**", ifelse(p < .01, "**", ifelse(p < .05, "*", ""))))
Rformatted = paste0(Rformatted, stars)
}
# make all character strings equally long
max_length = max(nchar(Rformatted))
Rformatted = vapply(Rformatted, function(x) {
current_length = nchar(x)
difference = max_length - current_length
return(paste0(x, paste(rep(" ", difference), collapse = ''), sep = ''))
}, FUN.VALUE = character(1))
# build a new matrix that includes the formatted correlations and their significance stars
Rnew <- matrix(Rformatted, ncol = ncol(x))
rownames(Rnew) <- colnames(Rnew) <- colnames(x)
# replace undesired values
if (use == 'upper') {
Rnew[lower.tri(Rnew, diag = replace_diagonal)] <- replacement
} else if (use == 'lower') {
Rnew[upper.tri(Rnew, diag = replace_diagonal)] <- replacement
} else if (replace_diagonal) {
diag(Rnew) <- replacement
}
return(Rnew)
}
save_correlation_matrix = function(df, filename, ...) {
return(write.csv2(correlation_matrix(df, ...), file = filename))
}