# --- Get Results from Mash Object with mashr get functions --------- #' @title Get current date-time in a filename-appropriate format. #' #' @description Converts the current \code{Sys.time()} system time to a format #' that is acceptable to include in a filename. Changes punctuation that #' won't work in a filename. #' #' @return A string containing the current date-time with spaces and colons #' replaced with underscores and periods, respectively. #' #' @importFrom stringr str_replace_all get_date_filename <- function(){ str_replace_all(str_replace_all(Sys.time(), ":", "."), " ", "_") } #' Get number of conditions #' #' @param m The mash result #' #' @importFrom ashr get_pm get_ncond = function(m){ return(ncol(get_pm(m))) } #' Return the Bayes Factor for each effect #' #' @param m the mash result (from joint or 1by1 analysis); must have been #' computed using usepointmass = TRUE #' #' @return if m was fitted using usepointmass=TRUE then returns a vector of #' the log10(bf) values for each effect. That is, the jth element #' lbf_j is log10(Pr(Bj | g = ghat-nonnull)/Pr(Bj | g = 0)) where gha #' t-nonnull is the non-null part of ghat. Otherwise returns NULL. #' get_log10bf = function(m) { if(is.null(m$null_loglik)){ return(NULL) } else { return((m$alt_loglik - m$null_loglik)/log(10)) } } #' From a mash result, get effects that are significant in at least #' one condition. #' #' @param m the mash result (from joint or 1by1 analysis) #' @param thresh indicates the threshold below which to call signals significant #' @param conditions which conditions to include in check (default to all) #' @param sig_fn the significance function used to extract significance from #' mash object; eg could be ashr::get_lfsr or ashr::get_lfdr. (Small values #' must indicate significant.) #' #' @return a vector containing the indices of the significant effects, by #' order of most significant to least #' #' @importFrom ashr get_lfsr #' #' @export get_significant_results = function(m, thresh = 0.05, conditions = NULL, sig_fn = ashr::get_lfsr) { if (is.null(conditions)) { conditions = 1:get_ncond(m) } top = apply(sig_fn(m)[,conditions,drop=FALSE],1,min) # find top effect # in each condition sig = which(top < thresh) ord = order(top[sig],decreasing=FALSE) sig[ord] } #' @title Get mash marker_df #' #' @description Pulls SNP markers information in the mash object from a bigsnp #' object. #' #' @param m An object of type mash #' @param snp A bigSNP object, produced by the bigsnpr package. Here, the WAMI #' SNP information. #' #' @importFrom magrittr %>% #' @importFrom rlang .data #' @importFrom tibble enframe tibble #' @importFrom dplyr arrange row_number mutate left_join #' get_marker_df <- function(m, snp){ if(attr(snp, "class") != "bigSNP"){ stop("snp needs to be a bigSNP object, produced by the bigsnpr package.") } markers <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos, marker.ID = snp$map$marker.ID) %>% mutate(CHRN = as.numeric(as.factor(.data$CHR)), CHR = as.factor(.data$CHR), value = row_number()) marker_df <- get_significant_results(m, thresh = 1) %>% enframe(name = NULL, value = "value") %>% arrange(.data$value) %>% left_join(markers, by = "value") return(marker_df) } #' @title Return the estimated mixture proportions. Use get_estimated_pi to #' extract the estimates of the mixture proportions for different types of #' covariance matrix. This tells you which covariance matrices have most of #' the mass. #' #' @param m the mash result #' @param dimension indicates whether you want the mixture proportions for the #' covariances, grid, or all #' #' @return a named vector containing the estimated mixture proportions. #' #' @details If the fit was done with `usepointmass=TRUE` then the first #' element of the returned vector will correspond to the null, and the #' remaining elements to the non-null covariance matrices. Suppose the fit #' was done with $K$ covariances and a grid of length $L$. If #' `dimension=cov` then the returned vector will be of length $K$ #' (or $K+1$ if `usepointmass=TRUE`). If `dimension=grid` then #' the returned vector will be of length $L$ (or $L+1$). If #' `dimension=all` then the returned vector will be of length $LK$ (or #' $LK+1$). The names of the vector will be informative for which #' combination each element corresponds to. #' #' @importFrom ashr get_fitted_g #' get_estimated_pi = function(m, dimension = c("cov","grid","all")){ dimension = match.arg(dimension) if(dimension=="all"){ get_estimated_pi_no_collapse(m) } else { g = get_fitted_g(m) pihat = g$pi pihat_names = NULL pi_null = NULL if(g$usepointmass){ pihat_names=c("null",pihat_names) pi_null = pihat[1] pihat = pihat[-1] } pihat = matrix(pihat,nrow=length(g$Ulist)) if(dimension=="cov"){ pihat = rowSums(pihat) pihat_names = c(pihat_names,names(g$Ulist)) } else if(dimension=="grid"){ pihat = colSums(pihat) pihat_names = c(pihat_names,1:length(g$grid)) } pihat = c(pi_null,pihat) names(pihat) = pihat_names return(pihat) } } get_estimated_pi_no_collapse = function(m){ g = get_fitted_g(m) pihat = g$pi names(pihat) = names(expand_cov(g$Ulist, g$grid, g$usepointmass)) pihat } #' @title Create expanded list of covariance matrices expanded by #' grid, Sigma_{lk} = omega_l U_k #' #' @description This is an internal (non-exported) function. This help #' page provides additional documentation mainly intended for #' developers and expert users. #' #' @param Ulist a list of covarance matrices #' #' @param grid a grid of scalar values by which the covariance #' matrices are to be sc #' #' @param usepointmass if TRUE adds a point mass at 0 (null component) #' to the list #' #' @return This takes the covariance matrices in Ulist and multiplies #' them by the grid values If usepointmass is TRUE then it adds a null #' component. #' #' @keywords internal #' expand_cov = function(Ulist, grid, usepointmass = TRUE){ scaled_Ulist = scale_cov(Ulist, grid) R = nrow(Ulist[[1]]) if(usepointmass){ scaled_Ulist = c(list(null=matrix(0,nrow=R,ncol=R)),scaled_Ulist) } return(scaled_Ulist) } #' @title Scale each covariance matrix in list Ulist by a scalar in #' vector grid #' #' @description This is an internal (non-exported) function. This help #' page provides additional documentation mainly intended for #' developers and expert users. #' #' @param Ulist a list of matrices #' #' @param grid a vector of scaling factors (standard deviaions) #' #' @return a list with length length(Ulist)*length(grid) #' #' @keywords internal #' scale_cov = function(Ulist, grid){ orig_names = names(Ulist) Ulist = unlist( lapply(grid^2, function(x){multiply_list(Ulist,x)}), recursive = FALSE) names(Ulist) = unlist( lapply(1:length(grid), function(x){paste0(orig_names,".",x)}), recursive = FALSE) return(Ulist) } # Multiply each element of a list by scalar. (In our application each # element of the list is a matrix.) multiply_list = function(Ulist, x){lapply(Ulist, function(U){x*U})} #' @title Get column names from a mash object #' #' @description This function extracts the column names from the local false #' sign rate table of a mash object's results. This can tell you the condition #' names or phenotype names used in the mash object. That can be useful for #' looking at a subset of these columns, say. #' #' @param m An object of type mash #' #' @return A vector of phenotype names #' #' @examples #' \dontrun{get_colnames(m = mash_obj)} #' get_colnames <- function(m){ column_names <- colnames(m$result$lfsr) return(column_names) } get_Ulist <- function(m){ Ulist <- m$fitted_g$Ulist return(Ulist) } #' Count number of conditions each effect is significant in #' #' @param m the mash result (from joint or 1by1 analysis) #' @param thresh indicates the threshold below which to call signals significant #' @param conditions which conditions to include in check (default to all) #' @param sig_fn the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr #' #' @return a vector containing the number of significant conditions #' get_n_significant_conditions = function(m, thresh = 0.05, conditions = NULL, sig_fn = get_lfsr){ if (is.null(conditions)) { conditions = 1:get_ncond(m) } return(apply(sig_fn(m)[,conditions,drop=FALSE] < thresh, 1, sum)) } #' Compute the proportion of (significant) signals shared by magnitude in each pair of conditions, based on the poterior mean #' #' @param m the mash fit #' @param factor a number between 0 and 1 - the factor within which effects are #' considered to be shared. #' @param lfsr_thresh the lfsr threshold for including an effect in the #' assessment #' @param FUN a function to be applied to the estimated effect sizes before #' assessing sharing. The most obvious choice beside the default #' 'FUN=identity' would be 'FUN=abs' if you want to ignore the sign of the #' effects when assesing sharing. #' @details For each pair of tissues, first identify the effects that are #' significant (by lfsr factor & ratio < (1/factor)) } } S[lower.tri(S, diag = FALSE)] = t(S)[lower.tri(S, diag = FALSE)] colnames(S) = row.names(S) = colnames(m$result$PosteriorMean) return(S) } #' From a mash result, get effects that are significant in at least one condition #' #' @param m the mash result (from joint or 1by1 analysis) #' @param thresh indicates the threshold below which to call signals significant #' @param conditions which conditions to include in check (default to all) #' @param sig_fn the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr. (Small values must indicate significant.) #' #' @return a vector containing the indices of the significant effects, by order of most significant to least #' #' @importFrom ashr get_lfsr #' #' @export get_significant_results = function(m, thresh = 0.05, conditions = NULL, sig_fn = ashr::get_lfsr) { if (is.null(conditions)) { conditions = 1:get_ncond(m) } top = apply(sig_fn(m)[,conditions, drop = FALSE], 1, min) # find top effect in each condition sig = which(top < thresh) ord = order(top[sig], decreasing = FALSE) sig[ord] } #' @title Get data frames of types of GxE from a mash object #' #' @description Performs set operations to determine pairwise GxE for effects #' from a mash object. #' #' @param m An object of type mash #' @param thresh Numeric. The threshold for including an effect in the assessment #' @param factor a number between 0 and 1. The factor within which effects are #' considered to be shared. #' #' @return A list containing eight data frames. Those with names that start #' "S_" contain significant effects of different types between pairs of #' named rows and columns. S_all_pairwise contains all significant effects; #' NS_pairwise contains all non-significant effects. S_CN contains effects #' significant in only one condition, and effects with a significantly #' different magnitude (differential sensitivity). This dataframe is not #' conservative using the local false sign rate test - we can't determine #' the sign of one of the effects for effects significant in only one #' condition - so it's not recommended to use this, but included. S_2_no #' contains effects significant in both conditions that do not differ #' significantly in magnitude. These effects do not have GxE. S_AP contains #' effects significant in both conditions that differ in their sign - and #' have antagonistic pleiotropy. S_DS contains effects significant in both #' conditions that differ in the magnitude of their effect, but not their #' sign - differentially sensitive alleles. S_1_row and S_1_col contain #' effects that are significant in just one of the two conditions - the row #' or the column, respectively. #' #' @importFrom dplyr between mutate filter #' @importFrom tibble enframe #' @importFrom magrittr %>% #' @importFrom rlang .data #' #' @export get_GxE = function(m, factor = 0.4, thresh = 0.05){ R = get_ncond(m) # Effects to consider S_all = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m), get_colnames(m))) S_2_no = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m), get_colnames(m))) S_CN = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m), get_colnames(m))) S_AP = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m), get_colnames(m))) S_DS = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m), get_colnames(m))) NS_pair = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m), get_colnames(m))) S_i = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m), get_colnames(m))) S_j = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m), get_colnames(m))) for(i in 1:R){ for(j in 1:R){ if(i == j){ S_all[i, j] = length(get_significant_results(m, thresh = thresh, conditions = i)) S_CN[i, j] = 0 # Not conservative!! S_2_no[i, j] = length(get_significant_results(m, thresh = thresh, conditions = i)) S_AP[i, j] = 0 S_DS[i, j] = 0 sig_i = get_significant_results(m, thresh = thresh, conditions = i) all_i = get_significant_results(m, thresh = 1, conditions = i) ns_i = dplyr::setdiff(all_i, sig_i) # effects that aren't sig in i NS_pair[i, j] = length(ns_i) S_i[i, j] = 0 S_j[i, j] = 0 } else { sig_i = get_significant_results(m, thresh = thresh, conditions = i) sig_j = get_significant_results(m, thresh = thresh, conditions = j) all_i = get_significant_results(m, thresh = 1, conditions = i) all_j = get_significant_results(m, thresh = 1, conditions = j) ns_i = setdiff(all_i, sig_i) # effects that aren't sig in i ns_j = setdiff(all_j, sig_j) # effects that aren't sig in j # Markers where we aren't sure of the sign in either condition # aka most of the effects NS_pair[i,j] <- length(intersect(ns_i, ns_j)) # Markers where we are sure of the sign in just one condition # Markers significant in i but not in j ms_isigi = intersect(sig_i, ns_j) # Markers significant in j but not in i ms_jsigj = intersect(sig_j, ns_i) # Markers where we are sure of the sign in both conditions effects_df <- get_pm(m)[union(sig_i, sig_j), i] %>% enframe(name = "Marker", value = "Effect_i") %>% mutate(Effect_j = get_pm(m)[union(sig_i, sig_j), j], ratio = .data$Effect_i/.data$Effect_j, ##divide effect sizes, if this ratio is positive there is not AP APratio = .data$Effect_i/-.data$Effect_j) ##divide effect sizes, if this ratio is positive there is AP ## GxE: we are sure of the sign for two effects, and they are the same sign # No GxE in this pair - effects are same sign and same mag ms_sig2_noGxE <- effects_df %>% filter(between(.data$ratio, factor, 1/factor)) # DS: we are sure of the sign for two effects, and they are the same sign ms_sig2_DS <- effects_df %>% filter(.data$ratio > 0 & !between(.data$ratio, factor, 1/factor)) ## GxE we are sure of the sign for two effects, and they are opposite # AP: we are sure of the sign for two effects, and they are opposite ms_sig2_AP <- effects_df %>% filter(between(.data$APratio, 0, 1E10)) S_all[i, j] = sum(length(ms_isigi), length(ms_jsigj), nrow(ms_sig2_noGxE), nrow(ms_sig2_DS), nrow(ms_sig2_AP)) S_CN[i, j] = sum(length(ms_isigi), length(ms_jsigj), nrow(ms_sig2_DS)) # Not conservative!! S_2_no[i, j] = sum(nrow(ms_sig2_noGxE)) S_AP[i, j] = sum(nrow(ms_sig2_AP)) S_DS[i, j] = sum(nrow(ms_sig2_DS)) S_i[i, j] = length(ms_isigi) S_j[i, j] = length(ms_jsigj) } } } return(list(S_all_pairwise = S_all, S_CN = S_CN, S_2_no = S_2_no, S_AP = S_AP, S_DS = S_DS, NS_pairwise = NS_pair, S_1_row = S_i, S_1_col = S_j)) } #' @title Get the positions of objects in a mash object Ulist that are above #' some mass threshold. #' #' @description Get the positions of objects in a mash object Ulist that are #' above some mass threshold. #' #' @param m An object of type mash #' @param thresh Numeric. The mass threshold for including a covariance matrix #' #' @export get_U_by_mass <- function(m, thresh = 0.05){ umass <- mash_plot_covar(m, saveoutput = FALSE) mass_thresh <- umass$covar_df$`Covariance Matrix`[which(umass$covar_df$Mass >= thresh)] Ulist_order <- names(get_Ulist(m)) range <- which(Ulist_order %in% mass_thresh) return(range) } #' @title Reorder correlation matrix #' #' @description Reorder correlation coefficients from a matrix of things #' (including NA's) and hierarchically cluster them #' #' @param cormat A correlation matrix #' #' @importFrom cluster daisy #' @importFrom stats hclust #' reorder_cormat <- function(cormat){ # Use correlation between variables as distance dd <- daisy(cormat, metric = "gower") hc <- hclust(dd) cormat <- cormat[hc$order, hc$order] } # --- Plot & Save Plots --------- #' ggplot of single mash effect #' #' @description Creates a plot with point estimates and standard errors for #' effects of a single SNP in multiple conditions. #' #' @param m An object of type mash #' @param snp A bigSNP object, produced by the bigsnpr package. Here, #' the WAMI SNP information. #' @param n Optional. Integer or integer vector. The result number to plot, in #' order of significance. 1 would be the top result, for example. Find #' these with \code{\link{get_significant_results}}. #' @param i Optional. Integer or integer vector. The result number to plot, in #' the order of the mash object. 1 would be the first marker in the mash #' object, for example. Find these with \code{\link{get_marker_df}}. #' @param marker Optional. Print the marker name on the plot? #' @param saveoutput Logical. Should the output be saved to the path? #' @param suffix Character. Optional. A unique suffix used to save the files, #' instead of the current date & time. #' #' @note Specify only one of n or i. #' #' @importFrom ashr get_psd #' @importFrom cowplot save_plot #' @importFrom tibble enframe #' @importFrom dplyr mutate case_when #' @importFrom tidyr separate #' @import ggplot2 #' @importFrom purrr as_vector #' @importFrom stringr str_replace str_replace_all str_sub str_length #' #' @export mash_plot_marker_effect <- function(m, snp = snp, n = NA, i = NA, marker = TRUE, saveoutput = FALSE, suffix = ""){ stopifnot((typeof(n) %in% c("double", "integer") | typeof(i) %in% c("double", "integer"))) if(attr(snp, "class") != "bigSNP"){ stop("snp needs to be a bigSNP object, produced by the bigsnpr package.") } if(typeof(n) != "logical"){ i <- get_significant_results(m)[n] marker_df <- tibble(CHR = snp$map$chromosome[i], POS = snp$map$physical.pos[i], marker.ID = snp$map$marker.ID[i]) marker_name <- marker_df$marker.ID } else { marker_df <- get_marker_df(m, snp)[i,] marker_name <- marker_df$marker.ID } effectplot <- get_colnames(m) %>% enframe(name = NULL, value = "Conditions") %>% mutate(mn = get_pm(m)[i,], se = get_psd(m)[i,]) %>% mutate(Conditions = str_replace(.data$Conditions, "(Stand_)??Bhat_?", ""), Conditions = str_replace(.data$Conditions, "-mean$", "") ) ggobject <- ggplot(data = effectplot) + geom_point(mapping = aes(x = as.factor(.data$Conditions), y = .data$mn)) + geom_errorbar(mapping = aes(ymin = .data$mn - .data$se, ymax = .data$mn + .data$se, x = .data$Conditions), width = 0.3) + geom_hline(yintercept = 0, lty = 2) + labs(x = "Conditions", y = "Effect Size") + theme_classic() + theme(axis.text.x = element_text(size = 10, angle = 45, vjust = 1, hjust = 1), panel.background = element_rect(fill=NA), legend.position = "none", axis.title = element_text(size = 10), axis.text.y = element_text(size = 10), axis.line.x = element_line(size = 0.35, colour = 'grey50'), axis.line.y = element_line(size = 0.35, colour = 'grey50'), axis.ticks = element_line(size = 0.25, colour = 'grey50'), legend.justification = c(1, 0.75), legend.key.size = unit(0.35, 'cm'), legend.title = element_blank(), legend.text = element_text(size = 9), legend.text.align = 0, legend.background = element_blank(), plot.subtitle = element_text(size = 10, vjust = 0), strip.background = element_blank(), strip.text = element_text(hjust = 0.5, size = 10, vjust = 0), strip.placement = 'outside', panel.spacing.x = unit(-0., 'cm')) if(str_length(effectplot$Conditions[1]) > 10){ ggobject <- ggobject + theme(plot.margin = unit(c(0.2, 0.2, 0.5, 1.5), 'cm'),) } if(marker == TRUE){ ggobject <- ggobject + ggtitle(label = marker_name) } if(saveoutput == TRUE){ if(!(str_sub(suffix, end = 1) %in% c("", "_"))){ suffix <- paste0("_", suffix) } if(str_sub(suffix, end = 1) %in% c("")){ suffix <- paste0("_", get_date_filename()) } save_plot(filename = paste0("Effect_plot_", str_replace_all(marker_name, " ", "_"), suffix, ".png"), plot = ggobject, base_aspect_ratio = nrow(effectplot)*0.1 + 0.1, base_height = 3.5) } return(list(marker = marker_name, effect_df = effectplot, ggobject = ggobject)) } #' ggplot of covariance matrix masses #' #' @description Creates a bar plot using ggplot of the masses that are on each #' covariance matrix specified in the mash model. #' #' @param m An object of type mash #' @param saveoutput Logical. Should the output be saved to the path? #' @param suffix Character. Optional. A unique suffix used to save the files, #' instead of the current date & time. #' #' @importFrom cowplot save_plot #' @importFrom tibble enframe #' @importFrom dplyr mutate arrange desc #' @importFrom stringr str_replace str_sub #' @import ggplot2 #' #' @note This plot can be useful for seeing the overall patterns of effects in #' the data used in mash. Non-significant effects will add mass to the #' "no_effects" covariance matrix, while significant effects will add mass #' to one of the other covariance matrices. You can use mash_plot_Ulist() #' to plot the covariance matrix patterns themselves. #' #' @export mash_plot_covar <- function(m, saveoutput = FALSE, suffix = ""){ df <- get_estimated_pi(m) df <- enframe(df, name = "Covariance Matrix", value = "Mass") %>% mutate(`Covariance Matrix` = str_replace(.data$`Covariance Matrix`, "(Stand_)??Bhat_?", "single_effect_"), `Covariance Matrix` = str_replace(.data$`Covariance Matrix`, "^null$", "no_effects")) %>% arrange(desc(.data$`Covariance Matrix`)) df$`Covariance Matrix` <- factor(df$`Covariance Matrix`, levels = (df$`Covariance Matrix`)) ggobject <- ggplot(df) + geom_bar(aes(x = .data$Mass, y = .data$`Covariance Matrix`), stat = "identity") + theme_classic() + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), panel.background = element_rect(fill=NA), axis.title = element_text(size = 10), axis.text = element_text(size = 10), axis.line.x = element_line(size = 0.35, colour = 'grey50'), axis.line.y = element_line(size = 0.35, colour = 'grey50'), axis.ticks = element_line(size = 0.25, colour = 'grey50'), legend.justification = c(1, 0.75), legend.key.size = unit(0.35, 'cm'), legend.title = element_blank(), legend.text = element_text(size = 9), legend.text.align = 0, legend.background = element_blank(), plot.subtitle = element_text(size = 10, vjust = 0), strip.background = element_blank(), strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0), strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) if(saveoutput == TRUE){ if(!(str_sub(suffix, end = 1) %in% c("", "_"))){ suffix <- paste0("_", suffix) } if(str_sub(suffix, end = 1) %in% c("")){ suffix <- paste0("_", get_date_filename()) } save_plot(paste0("Covariance_matrix_mass_plot", suffix, ".png"), plot = ggobject, base_aspect_ratio = 1, base_height = nrow(df)*.15) } return(list(covar_df = df, ggobject = ggobject)) } #' ggplot of specific covariance matrix patterns #' #' @description Creates a tile plot using ggplot of the covariance matrices #' specified in the mash model. #' #' @param m An object of type mash #' @param range Numeric vector. Which covariance matrices should be plotted? #' @param saveoutput Logical. Should the output be saved to the path? #' @param suffix Character. Optional. A unique suffix used to save the files, #' instead of the current date & time. #' @param limits Logical. Should there be plot limits of -1 and 1? Default is TRUE. #' @param labels Logical. Should each pairwise comparison proportion have a label? Default is TRUE. #' #' @return A list of dataframes used to make the tile plots and the plots #' themselves. #' #' @importFrom cowplot save_plot #' @importFrom tibble enframe as_tibble #' @importFrom dplyr mutate filter #' @importFrom rlist list.append #' @importFrom rlang .data #' @importFrom stringr str_replace str_sub #' @importFrom tidyr pivot_longer #' @import ggplot2 #' #' @export mash_plot_Ulist <- function(m, range = NA, saveoutput = FALSE, suffix = "", limits = TRUE, labels = TRUE){ Ulist <- get_Ulist(m) Ureturn <- list() stopifnot(typeof(range) %in% c("double", "integer", "logical")) if(typeof(range) == "logical"){ range <- seq_along(Ulist) } for(u in range){ U1 <- Ulist[[u]] # Remove half the tiles if the matrix is symmetric if(isSymmetric(U1)){ for(i in 1:nrow(U1)){ for(j in 1:ncol(U1)){ if(i < j){ U1[i, j] <- NA } } } } if(ncol(U1) == length(get_colnames(m))){ colnames(U1) <- str_replace(get_colnames(m), "(Stand_)??Bhat_?", "") %>% str_replace("-mean$", "") name_order <- str_replace(get_colnames(m), "(Stand_)??Bhat_?", "") %>% str_replace("-mean$", "") U1 <- as_tibble(U1, .name_repair = "unique") %>% mutate(rowU = str_replace(get_colnames(m), "(Stand_)??Bhat_?", ""), rowU = str_replace(.data$rowU, "-mean$", "")) %>% pivot_longer(cols = -.data$rowU, names_to = "colU", values_to = "covar") %>% filter(!is.na(.data$covar)) U1$rowU <- factor(U1$rowU, levels = name_order) U1$colU <- factor(U1$colU, levels = name_order) } else { U1 <- as_tibble(U1, rownames = "rowU", .name_repair = "unique") %>% pivot_longer(cols = -.data$rowU, names_to = "colU", values_to = "covar") %>% mutate(rowU = paste0("Condition", .data$rowU), colU = str_replace(.data$colU, "\\.\\.\\.", "Condition")) %>% filter(!is.na(.data$covar)) } if(limits == TRUE){ U1_covar <- U1 %>% ggplot(aes(x = .data$rowU, y = .data$colU)) + geom_tile(aes(fill = .data$covar), na.rm = TRUE) + scale_fill_gradientn(colors = c("#440154FF", "#3B528BFF", "#2C728EFF", "white", "#27AD81FF", "#5DC863FF", "#FDE725FF"), limits = c(-1,1)) + #geom_text(aes(label = round(.data$covar, 1)), color = "darkgrey") + theme(legend.position = "right", axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + xlab("") + ylab("") + ggtitle(names(Ulist)[u]) } else { lim_lower <- min(U1$covar) lim_upper <- max(U1$covar) + max(U1$covar) * 0.01 U1_covar <- U1 %>% ggplot(aes(x = .data$rowU, y = .data$colU)) + geom_tile(aes(fill = .data$covar), na.rm = TRUE) + scale_fill_gradientn(colors = c("#440154FF", "#472D7BFF", "#3B528BFF", "#2C728EFF", "#21908CFF", "#27AD81FF", "#5DC863FF", "#AADC32FF", "#FDE725FF"), limits = c(lim_lower, lim_upper)) + #geom_text(aes(label = round(.data$covar, 1)), color = "darkgrey") + theme_classic() + theme(panel.background = element_rect(fill=NA), legend.position = "right", axis.title = element_text(size = 10), axis.text.y = element_text(size = 10), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), axis.line.x = element_line(size = 0.35, colour = 'grey50'), axis.line.y = element_line(size = 0.35, colour = 'grey50'), axis.ticks = element_line(size = 0.25, colour = 'grey50'), legend.justification = c(1, 0.75), legend.key.size = unit(0.35, 'cm'), legend.title = element_blank(), legend.text = element_text(size = 9), legend.text.align = 0, legend.background = element_blank(), plot.subtitle = element_text(size = 10, vjust = 0), strip.background = element_blank(), strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0), strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) + xlab("") + ylab("") + ggtitle(names(Ulist)[u]) } if (labels == TRUE) { U1_covar <- U1_covar + geom_text(aes(label = round(.data$covar, 1)), color = "darkgrey") } Ureturn <- list.append(Ureturn, U1 = U1, ggobject = U1_covar) names(Ureturn)[-2] <- paste0(names(Ulist)[u], "_df") names(Ureturn)[-1] <- paste0(names(Ulist)[u], "_ggobject") if(saveoutput == TRUE){ if(!(str_sub(suffix, end = 1) %in% c("", "_"))){ suffix <- paste0("_", suffix) } if(str_sub(suffix, end = 1) %in% c("")){ suffix <- paste0("_", get_date_filename()) } save_plot(paste0("Covariances_plot_", names(Ulist)[u], suffix, ".png"), plot = U1_covar, base_height = length(get_colnames(m))*.18 + 1.2, base_asp = 1.35) } } return(Ureturn) } #' @title Manhattan plot in ggplot colored by significant conditions #' #' @description Takes a mash object and, for some vector of phenotypes, returns #' a Manhattan plot ggplot object (and its dataframe). Each SNP in the plot #' is colored by the number of phenotypes it is significant for. Even and #' odd chromosomes have different shapes for their SNPs, so that #' chromosome identity can be determined. #' #' @param m A mash object (outputted by mash). #' @param snp A bigSNP object, produced by the bigsnpr package. Here, #' the WAMI SNP information. #' @param cond A vector of phenotypes. Defaults to the names of each #' column in the mash object. #' @param saveoutput Logical. Should the output be saved to the path? #' @param suffix Character. Optional. A unique suffix used to save the files, #' instead of the current date & time. #' @param thresh Numeric. The threshold used for the local false sign rate to #' call significance in a condition. #' #' @return A \code{tbl_df()} of the data used to make the Manhattan plot, and a #' ggplot object containing the Manhattan. #' #' @importFrom cowplot save_plot #' @importFrom dplyr rename select arrange mutate left_join #' @import ggplot2 #' @importFrom tibble as_tibble rownames_to_column enframe #' @importFrom tidyr separate #' @importFrom stringr str_sub #' #' @examples #' \dontrun{manhattan_out <- mash_ggman_by_condition(m = m, saveoutput = TRUE)} #' #' @export mash_plot_manhattan_by_condition <- function(m, snp, cond = NA, saveoutput = FALSE, suffix = "", thresh = 0.05){ if(attr(snp, "class") != "bigSNP"){ stop("snp needs to be a bigSNP object, produced by the bigsnpr package.") } num_sig_in_cond <- c() if(is.na(cond)[1]){ cond <- get_colnames(m = m) } log10bf_df <- get_log10bf(m = m) %>% as.data.frame() %>% rownames_to_column(var = "value") %>% mutate(value = as.integer(.data$value)) %>% as_tibble() %>% left_join(get_marker_df(m = m, snp = snp), by = "value") %>% dplyr::rename(log10BayesFactor = .data$V1) %>% dplyr::select(-.data$value) %>% mutate(Num_Sig_Conditions = get_n_significant_conditions(m = m, thresh = thresh, conditions = cond) ) log10BF <- expression(paste("log"[10], plain("(Bayes Factor)"))) ggmanobject <- ggplot(data = log10bf_df, aes(x = .data$POS, y = .data$log10BayesFactor)) + geom_point(aes(color = .data$Num_Sig_Conditions, fill = .data$Num_Sig_Conditions, shape = as.factor(.data$CHR))) + facet_wrap(~ .data$CHR, nrow = 1, scales = "free_x", strip.position = "bottom") + scale_color_viridis_c(option = "B", end = 0.95) + scale_fill_viridis_c(option = "B", end = 0.95) + theme_classic() + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), panel.background = element_rect(fill=NA), legend.position = "right", axis.title = element_text(size = 10), axis.text = element_text(size = 10), axis.line.x = element_line(size = 0.35, colour = 'grey50'), axis.line.y = element_line(size = 0.35, colour = 'grey50'), axis.ticks = element_line(size = 0.25, colour = 'grey50'), legend.justification = c(1, 0.75), legend.key.size = unit(0.35, 'cm'), legend.title = element_blank(), legend.text = element_text(size = 9), legend.text.align = 0, legend.background = element_blank(), plot.subtitle = element_text(size = 10, vjust = 0), strip.background = element_blank(), strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0), strip.placement = 'outside', panel.spacing.x = unit(-0.05, 'cm')) + labs(x = "Chromosome", y = log10BF) + scale_x_continuous(expand = c(0.32, 0.32)) + scale_shape_manual(values = rep(c(21, 22, 23), 9), guide = FALSE) if(saveoutput == TRUE){ if(!(str_sub(suffix, end = 1) %in% c("", "_"))){ suffix <- paste0("_", suffix) } if(str_sub(suffix, end = 1) %in% c("")){ suffix <- paste0("_", get_date_filename()) } save_plot(paste0("Manhattan_mash", suffix, ".png"), plot = ggmanobject, base_aspect_ratio = 2.6, base_height = 3.3) } return(list(ggman_df = log10bf_df, ggmanobject = ggmanobject)) } #' @title Create a ggplot of pairwise sharing of mash effects #' #' @description Given a correlation matrix, an RDS with a correlation matrix, or #' a mash object, create a ggplot of pairwise sharing of mash effects using #' \code{\link{get_pairwise_sharing}} and \code{\link{ggcorr}}. #' #' @param m An object of type mash #' @param effectRDS An RDS containing a correlation matrix. #' @param corrmatrix A correlation matrix #' @param reorder Logical. Should the columns be reordered by similarity? #' @param saveoutput Logical. Should the output be saved to the path? #' @param suffix Character. Optional. A unique suffix used to save the files, #' instead of the current date & time. #' @param filename Character string with an output filename. Optional. #' @param ... Other arguments to \code{\link{get_pairwise_sharing}} or #' \code{\link{ggcorr}}. #' #' @importFrom GGally ggcorr #' @importFrom stringr str_replace_all str_sub #' @importFrom rlang .data #' #' @return A list containing a dataframe containing the correlations and a #' ggplot2 object containing the correlation plot. #' #' @export mash_plot_pairwise_sharing <- function(m = NULL, effectRDS = NULL, corrmatrix = NULL, reorder = TRUE, saveoutput = FALSE, filename = NA, suffix = "", ...){ # Additional arguments for get_pairwise_sharing, ggcorr, and save_plot requireNamespace("dots") factor <- dots::dots(name = 'factor', value = 0.5, ...) lfsr_thresh <- dots::dots(name = 'lfsr_thresh', value = 0.05, ...) FUN <- dots::dots(name = 'FUN', value = identity, ...) geom <- dots::dots(name = 'geom', value = 'circle', ...) label <- dots::dots(name = 'label', value = FALSE, ...) label_alpha <- dots::dots(name = 'label_alpha', value = TRUE, ...) label_size <- dots::dots(name = 'label_size', value = 3, ...) hjust <- dots::dots(name = 'hjust', value = 0.95, ...) vjust <- dots::dots(name = 'vjust', value = 0.3, ...) layout.exp <- dots::dots(name = 'layout.exp', value = 9, ...) min_size <- dots::dots(name = 'min_size', value = 0, ...) max_size <- dots::dots(name = 'max_size', value = 3.5, ...) option <- dots::dots(name = 'option', value = 'B', ...) dpi <- dots::dots(name = 'dpi', value = 500, ...) base_aspect_ratio <- dots::dots(name = 'base_aspect_ratio', value = 1.1, ...) if(is.na(filename)[1]){ if(!(str_sub(suffix, end = 1) %in% c("", "_"))){ suffix <- paste0("_", suffix) } if(str_sub(suffix, end = 1) %in% c("")){ suffix <- paste0("_", get_date_filename()) } filename <- paste0("Mash_pairwise_shared_effects", suffix, ".png") } # look for a shared effects matrix in the path, and if not, generate one if(!is.null(effectRDS) && is.null(m) && is.null(corrmatrix)){ shared_effects <- readRDS(effectRDS) } else if(!is.null(corrmatrix) && is.null(effectRDS) && is.null(m)){ shared_effects <- corrmatrix } else if(!is.null(m)){ shared_effects <- get_pairwise_sharing(m = m, factor = factor, lfsr_thresh = lfsr_thresh, FUN = FUN) rownames(shared_effects) <- str_replace_all(rownames(shared_effects), "(Stand_)??Bhat_?", "") %>% str_replace("-mean$", "") colnames(shared_effects) <- str_replace_all(colnames(shared_effects), "(Stand_)??Bhat_?", "") %>% str_replace("-mean$", "") } else { stop(paste0("Please specify one of these: ", "1. a mash output object (m), ", "2. the path to a effect rds file (mashRDS), ", "3. a correlation matrix (corrmatrix).")) } base_height <- dots::dots(name = 'base_height', value = nrow(shared_effects)*0.33+1, ...) if(reorder == TRUE){ corrdf <- reorder_cormat(cormat = shared_effects) corrplot <- ggcorr(data = NULL, cor_matrix = corrdf, geom = geom, label = label, label_alpha = label_alpha, label_size = label_size, hjust = hjust, vjust = vjust, layout.exp = layout.exp, min_size = min_size, max_size = max_size) + scale_color_viridis_c(option = option) + theme_classic() + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), panel.background = element_rect(fill=NA), axis.title = element_text(size = 10), axis.text = element_text(size = 10), axis.line.x = element_line(size = 0.35, colour = 'grey50'), axis.line.y = element_line(size = 0.35, colour = 'grey50'), axis.ticks = element_line(size = 0.25, colour = 'grey50'), legend.justification = c(1, 0.75), legend.key.size = unit(0.35, 'cm'), legend.title = element_blank(), legend.text = element_text(size = 9), legend.text.align = 0, legend.background = element_blank(), plot.subtitle = element_text(size = 10, vjust = 0), strip.background = element_blank(), strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0), strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) } else { corrplot <- ggcorr(data = NULL, cor_matrix = shared_effects, geom = geom, label = label, label_alpha = label_alpha, label_size = label_size, hjust = hjust, vjust = vjust, layout.exp = layout.exp, min_size = min_size, max_size = max_size) + scale_color_viridis_c(option = option) + theme_classic() + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), panel.background = element_rect(fill=NA), axis.title = element_text(size = 10), axis.text = element_text(size = 10), axis.line.x = element_line(size = 0.35, colour = 'grey50'), axis.line.y = element_line(size = 0.35, colour = 'grey50'), axis.ticks = element_line(size = 0.25, colour = 'grey50'), legend.justification = c(1, 0.75), legend.key.size = unit(0.35, 'cm'), legend.title = element_blank(), legend.text = element_text(size = 9), legend.text.align = 0, legend.background = element_blank(), plot.subtitle = element_text(size = 10, vjust = 0), strip.background = element_blank(), strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0), strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) } if(saveoutput == TRUE){ save_plot(filename = filename, corrplot, base_aspect_ratio = base_aspect_ratio, base_height = base_height, dpi = dpi) } return(list(corr_matrix = shared_effects, gg_corr = corrplot)) } #' @title Significant SNPs per number of conditions #' #' @description For some number of columns in a mash object that correspond to #' conditions, find the number of SNPs that are significant for that number #' of conditions. #' #' @param m An object of type mash #' @param conditions A vector of conditions. Get these with get_colnames(m). #' @param saveoutput Logical. Save plot output to a file? Default is FALSE. #' @param thresh What is the threshold to call an effect significant? Default #' is 0.05. #' @param suffix Character. Optional. A unique suffix used to save the files, #' instead of the current date & time. #' #' @return A list containing a dataframe of the number of SNPs significant per #' number of conditions, and a ggplot object using that dataframe. #' #' @import ggplot2 #' @importFrom tibble enframe #' @importFrom dplyr rename summarise filter group_by n #' @importFrom stringr str_sub #' #' @examples #' \dontrun{mash_plot_sig_by_condition(m = mash_obj, saveoutput = TRUE)} #' #' @export mash_plot_sig_by_condition <- function(m, conditions = NA, saveoutput = FALSE, suffix = "", thresh = 0.05){ thresh <- as.numeric(thresh) num_sig_in_cond <- c() if(typeof(conditions) == "logical"){ cond <- get_colnames(m) } else { cond <- conditions } SigHist <- get_n_significant_conditions(m = m, thresh = thresh, conditions = cond) %>% enframe(name = "Marker") %>% rename(Number_of_Conditions = .data$value) %>% group_by(.data$Number_of_Conditions) %>% summarise(Significant_SNPs = n()) %>% filter(.data$Number_of_Conditions != 0) vis <- ggplot(SigHist, aes(x = .data$Number_of_Conditions, y = .data$Significant_SNPs)) + geom_line() + geom_point() + geom_hline(yintercept = 0, lty = 2) + xlab(label = "Number of Conditions") + ylab(label = "Number of Significant SNPs")+ theme_classic() + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), panel.background = element_rect(fill=NA), legend.position = "none", axis.title = element_text(size = 10), axis.text = element_text(size = 10), axis.line.x = element_line(size = 0.35, colour = 'grey50'), axis.line.y = element_line(size = 0.35, colour = 'grey50'), axis.ticks = element_line(size = 0.25, colour = 'grey50'), legend.justification = c(1, 0.75), legend.key.size = unit(0.35, 'cm'), legend.title = element_blank(), legend.text = element_text(size = 9), legend.text.align = 0, legend.background = element_blank(), plot.subtitle = element_text(size = 10, vjust = 0), strip.background = element_blank(), strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0), strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) if(saveoutput == TRUE){ if(!(str_sub(suffix, end = 1) %in% c("", "_"))){ suffix <- paste0("_", suffix) } if(str_sub(suffix, end = 1) %in% c("")){ suffix <- paste0("_", get_date_filename()) } ggsave(paste0("SNPs_significant_by_number_of_conditions", suffix, ".png"), width = 5, height = 3, units = "in", dpi = 400) } return(list(sighist = SigHist, ggobject = vis)) }