1163 lines
47 KiB
R
1163 lines
47 KiB
R
# --- Get Results from Mash Object with mashr get functions ---------
|
|
|
|
#' @title Get current date-time in a filename-appropriate format.
|
|
#'
|
|
#' @description Converts the current \code{Sys.time()} system time to a format
|
|
#' that is acceptable to include in a filename. Changes punctuation that
|
|
#' won't work in a filename.
|
|
#'
|
|
#' @return A string containing the current date-time with spaces and colons
|
|
#' replaced with underscores and periods, respectively.
|
|
#'
|
|
#' @importFrom stringr str_replace_all
|
|
get_date_filename <- function(){
|
|
str_replace_all(str_replace_all(Sys.time(), ":", "."), " ", "_")
|
|
}
|
|
|
|
|
|
#' Get number of conditions
|
|
#'
|
|
#' @param m The mash result
|
|
#'
|
|
#' @importFrom ashr get_pm
|
|
get_ncond = function(m){
|
|
return(ncol(get_pm(m)))
|
|
}
|
|
|
|
|
|
#' Return the Bayes Factor for each effect
|
|
#'
|
|
#' @param m the mash result (from joint or 1by1 analysis); must have been
|
|
#' computed using usepointmass = TRUE
|
|
#'
|
|
#' @return if m was fitted using usepointmass=TRUE then returns a vector of
|
|
#' the log10(bf) values for each effect. That is, the jth element
|
|
#' lbf_j is log10(Pr(Bj | g = ghat-nonnull)/Pr(Bj | g = 0)) where gha
|
|
#' t-nonnull is the non-null part of ghat. Otherwise returns NULL.
|
|
#'
|
|
get_log10bf = function(m) {
|
|
if(is.null(m$null_loglik)){
|
|
return(NULL)
|
|
} else {
|
|
return((m$alt_loglik - m$null_loglik)/log(10))
|
|
}
|
|
}
|
|
|
|
|
|
#' From a mash result, get effects that are significant in at least
|
|
#' one condition.
|
|
#'
|
|
#' @param m the mash result (from joint or 1by1 analysis)
|
|
#' @param thresh indicates the threshold below which to call signals significant
|
|
#' @param conditions which conditions to include in check (default to all)
|
|
#' @param sig_fn the significance function used to extract significance from
|
|
#' mash object; eg could be ashr::get_lfsr or ashr::get_lfdr. (Small values
|
|
#' must indicate significant.)
|
|
#'
|
|
#' @return a vector containing the indices of the significant effects, by
|
|
#' order of most significant to least
|
|
#'
|
|
#' @importFrom ashr get_lfsr
|
|
#'
|
|
#' @export
|
|
get_significant_results = function(m, thresh = 0.05, conditions = NULL,
|
|
sig_fn = ashr::get_lfsr) {
|
|
if (is.null(conditions)) {
|
|
conditions = 1:get_ncond(m)
|
|
}
|
|
top = apply(sig_fn(m)[,conditions,drop=FALSE],1,min) # find top effect
|
|
# in each condition
|
|
sig = which(top < thresh)
|
|
ord = order(top[sig],decreasing=FALSE)
|
|
sig[ord]
|
|
}
|
|
|
|
|
|
#' @title Get mash marker_df
|
|
#'
|
|
#' @description Pulls SNP markers information in the mash object from a bigsnp
|
|
#' object.
|
|
#'
|
|
#' @param m An object of type mash
|
|
#' @param snp A bigSNP object, produced by the bigsnpr package. Here, the WAMI
|
|
#' SNP information.
|
|
#'
|
|
#' @importFrom magrittr %>%
|
|
#' @importFrom rlang .data
|
|
#' @importFrom tibble enframe tibble
|
|
#' @importFrom dplyr arrange row_number mutate left_join
|
|
#'
|
|
get_marker_df <- function(m, snp){
|
|
if(attr(snp, "class") != "bigSNP"){
|
|
stop("snp needs to be a bigSNP object, produced by the bigsnpr package.")
|
|
}
|
|
markers <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos,
|
|
marker.ID = snp$map$marker.ID) %>%
|
|
mutate(CHRN = as.numeric(as.factor(.data$CHR)),
|
|
CHR = as.factor(.data$CHR),
|
|
value = row_number())
|
|
marker_df <- get_significant_results(m, thresh = 1) %>%
|
|
enframe(name = NULL, value = "value") %>%
|
|
arrange(.data$value) %>%
|
|
left_join(markers, by = "value")
|
|
|
|
return(marker_df)
|
|
}
|
|
|
|
#' @title Return the estimated mixture proportions. Use get_estimated_pi to
|
|
#' extract the estimates of the mixture proportions for different types of
|
|
#' covariance matrix. This tells you which covariance matrices have most of
|
|
#' the mass.
|
|
#'
|
|
#' @param m the mash result
|
|
#' @param dimension indicates whether you want the mixture proportions for the
|
|
#' covariances, grid, or all
|
|
#'
|
|
#' @return a named vector containing the estimated mixture proportions.
|
|
#'
|
|
#' @details If the fit was done with `usepointmass=TRUE` then the first
|
|
#' element of the returned vector will correspond to the null, and the
|
|
#' remaining elements to the non-null covariance matrices. Suppose the fit
|
|
#' was done with $K$ covariances and a grid of length $L$. If
|
|
#' `dimension=cov` then the returned vector will be of length $K$
|
|
#' (or $K+1$ if `usepointmass=TRUE`). If `dimension=grid` then
|
|
#' the returned vector will be of length $L$ (or $L+1$). If
|
|
#' `dimension=all` then the returned vector will be of length $LK$ (or
|
|
#' $LK+1$). The names of the vector will be informative for which
|
|
#' combination each element corresponds to.
|
|
#'
|
|
#' @importFrom ashr get_fitted_g
|
|
#'
|
|
get_estimated_pi = function(m, dimension = c("cov","grid","all")){
|
|
dimension = match.arg(dimension)
|
|
if(dimension=="all"){
|
|
get_estimated_pi_no_collapse(m)
|
|
} else {
|
|
g = get_fitted_g(m)
|
|
pihat = g$pi
|
|
pihat_names = NULL
|
|
pi_null = NULL
|
|
|
|
if(g$usepointmass){
|
|
pihat_names=c("null",pihat_names)
|
|
pi_null = pihat[1]
|
|
pihat = pihat[-1]
|
|
}
|
|
|
|
pihat = matrix(pihat,nrow=length(g$Ulist))
|
|
if(dimension=="cov"){
|
|
pihat = rowSums(pihat)
|
|
pihat_names = c(pihat_names,names(g$Ulist))
|
|
} else if(dimension=="grid"){
|
|
pihat = colSums(pihat)
|
|
pihat_names = c(pihat_names,1:length(g$grid))
|
|
}
|
|
|
|
pihat = c(pi_null,pihat)
|
|
names(pihat) = pihat_names
|
|
return(pihat)
|
|
}
|
|
}
|
|
|
|
get_estimated_pi_no_collapse = function(m){
|
|
g = get_fitted_g(m)
|
|
pihat = g$pi
|
|
names(pihat) = names(expand_cov(g$Ulist, g$grid, g$usepointmass))
|
|
pihat
|
|
}
|
|
|
|
#' @title Create expanded list of covariance matrices expanded by
|
|
#' grid, Sigma_{lk} = omega_l U_k
|
|
#'
|
|
#' @description This is an internal (non-exported) function. This help
|
|
#' page provides additional documentation mainly intended for
|
|
#' developers and expert users.
|
|
#'
|
|
#' @param Ulist a list of covarance matrices
|
|
#'
|
|
#' @param grid a grid of scalar values by which the covariance
|
|
#' matrices are to be sc
|
|
#'
|
|
#' @param usepointmass if TRUE adds a point mass at 0 (null component)
|
|
#' to the list
|
|
#'
|
|
#' @return This takes the covariance matrices in Ulist and multiplies
|
|
#' them by the grid values If usepointmass is TRUE then it adds a null
|
|
#' component.
|
|
#'
|
|
#' @keywords internal
|
|
#'
|
|
expand_cov = function(Ulist, grid, usepointmass = TRUE){
|
|
scaled_Ulist = scale_cov(Ulist, grid)
|
|
R = nrow(Ulist[[1]])
|
|
if(usepointmass){
|
|
scaled_Ulist = c(list(null=matrix(0,nrow=R,ncol=R)),scaled_Ulist)
|
|
}
|
|
return(scaled_Ulist)
|
|
}
|
|
|
|
#' @title Scale each covariance matrix in list Ulist by a scalar in
|
|
#' vector grid
|
|
#'
|
|
#' @description This is an internal (non-exported) function. This help
|
|
#' page provides additional documentation mainly intended for
|
|
#' developers and expert users.
|
|
#'
|
|
#' @param Ulist a list of matrices
|
|
#'
|
|
#' @param grid a vector of scaling factors (standard deviaions)
|
|
#'
|
|
#' @return a list with length length(Ulist)*length(grid)
|
|
#'
|
|
#' @keywords internal
|
|
#'
|
|
scale_cov = function(Ulist, grid){
|
|
orig_names = names(Ulist)
|
|
Ulist = unlist( lapply(grid^2, function(x){multiply_list(Ulist,x)}),
|
|
recursive = FALSE)
|
|
names(Ulist) = unlist( lapply(1:length(grid),
|
|
function(x){paste0(orig_names,".",x)}),
|
|
recursive = FALSE)
|
|
return(Ulist)
|
|
}
|
|
|
|
# Multiply each element of a list by scalar. (In our application each
|
|
# element of the list is a matrix.)
|
|
multiply_list = function(Ulist, x){lapply(Ulist, function(U){x*U})}
|
|
|
|
|
|
#' @title Get column names from a mash object
|
|
#'
|
|
#' @description This function extracts the column names from the local false
|
|
#' sign rate table of a mash object's results. This can tell you the condition
|
|
#' names or phenotype names used in the mash object. That can be useful for
|
|
#' looking at a subset of these columns, say.
|
|
#'
|
|
#' @param m An object of type mash
|
|
#'
|
|
#' @return A vector of phenotype names
|
|
#'
|
|
#' @examples
|
|
#' \dontrun{get_colnames(m = mash_obj)}
|
|
#'
|
|
get_colnames <- function(m){
|
|
column_names <- colnames(m$result$lfsr)
|
|
return(column_names)
|
|
}
|
|
|
|
get_Ulist <- function(m){
|
|
Ulist <- m$fitted_g$Ulist
|
|
return(Ulist)
|
|
}
|
|
|
|
#' Count number of conditions each effect is significant in
|
|
#'
|
|
#' @param m the mash result (from joint or 1by1 analysis)
|
|
#' @param thresh indicates the threshold below which to call signals significant
|
|
#' @param conditions which conditions to include in check (default to all)
|
|
#' @param sig_fn the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr
|
|
#'
|
|
#' @return a vector containing the number of significant conditions
|
|
#'
|
|
get_n_significant_conditions = function(m, thresh = 0.05, conditions = NULL,
|
|
sig_fn = get_lfsr){
|
|
if (is.null(conditions)) {
|
|
conditions = 1:get_ncond(m)
|
|
}
|
|
return(apply(sig_fn(m)[,conditions,drop=FALSE] < thresh, 1, sum))
|
|
}
|
|
|
|
|
|
#' Compute the proportion of (significant) signals shared by magnitude in each pair of conditions, based on the poterior mean
|
|
#'
|
|
#' @param m the mash fit
|
|
#' @param factor a number between 0 and 1 - the factor within which effects are
|
|
#' considered to be shared.
|
|
#' @param lfsr_thresh the lfsr threshold for including an effect in the
|
|
#' assessment
|
|
#' @param FUN a function to be applied to the estimated effect sizes before
|
|
#' assessing sharing. The most obvious choice beside the default
|
|
#' 'FUN=identity' would be 'FUN=abs' if you want to ignore the sign of the
|
|
#' effects when assesing sharing.
|
|
#' @details For each pair of tissues, first identify the effects that are
|
|
#' significant (by lfsr<lfsr_thresh) in at least one of the two tissues.
|
|
#' Then compute what fraction of these have an estimated (posterior mean)
|
|
#' effect size within a factor `factor` of one another. The results are
|
|
#' returned as an R by R matrix.
|
|
#'
|
|
#' @examples
|
|
#' \dontrun{
|
|
#' get_pairwise_sharing(m) # sharing by magnitude (same sign)
|
|
#' get_pairwise_sharing(m, factor=0) # sharing by sign
|
|
#' get_pairwise_sharing(m, FUN=abs) # sharing by magnitude when sign is ignored
|
|
#' }
|
|
#'
|
|
#' @export
|
|
get_pairwise_sharing = function(m, factor = 0.5, lfsr_thresh = 0.05,
|
|
FUN = identity){
|
|
R = get_ncond(m)
|
|
#lfsr = get_lfsr(m)
|
|
S = matrix(NA, nrow = R, ncol = R)
|
|
for (i in 1:R) {
|
|
for (j in i:R) {
|
|
sig_i = get_significant_results(m, thresh = lfsr_thresh, conditions = i)
|
|
sig_j = get_significant_results(m, thresh = lfsr_thresh, conditions = j)
|
|
a = union(sig_i, sig_j)
|
|
ratio = FUN(get_pm(m)[a,i])/FUN(get_pm(m)[a,j]) ##divide effect sizes
|
|
S[i,j] = mean(ratio > factor & ratio < (1/factor))
|
|
}
|
|
}
|
|
S[lower.tri(S, diag = FALSE)] = t(S)[lower.tri(S, diag = FALSE)]
|
|
colnames(S) = row.names(S) = colnames(m$result$PosteriorMean)
|
|
|
|
return(S)
|
|
}
|
|
|
|
#' From a mash result, get effects that are significant in at least one condition
|
|
#'
|
|
#' @param m the mash result (from joint or 1by1 analysis)
|
|
#' @param thresh indicates the threshold below which to call signals significant
|
|
#' @param conditions which conditions to include in check (default to all)
|
|
#' @param sig_fn the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr. (Small values must indicate significant.)
|
|
#'
|
|
#' @return a vector containing the indices of the significant effects, by order of most significant to least
|
|
#'
|
|
#' @importFrom ashr get_lfsr
|
|
#'
|
|
#' @export
|
|
get_significant_results = function(m, thresh = 0.05, conditions = NULL,
|
|
sig_fn = ashr::get_lfsr) {
|
|
if (is.null(conditions)) {
|
|
conditions = 1:get_ncond(m)
|
|
}
|
|
top = apply(sig_fn(m)[,conditions, drop = FALSE], 1, min) # find top effect in each condition
|
|
sig = which(top < thresh)
|
|
ord = order(top[sig], decreasing = FALSE)
|
|
sig[ord]
|
|
}
|
|
|
|
#' @title Get data frames of types of GxE from a mash object
|
|
#'
|
|
#' @description Performs set operations to determine pairwise GxE for effects
|
|
#' from a mash object.
|
|
#'
|
|
#' @param m An object of type mash
|
|
#' @param thresh Numeric. The threshold for including an effect in the assessment
|
|
#' @param factor a number between 0 and 1. The factor within which effects are
|
|
#' considered to be shared.
|
|
#'
|
|
#' @return A list containing eight data frames. Those with names that start
|
|
#' "S_" contain significant effects of different types between pairs of
|
|
#' named rows and columns. S_all_pairwise contains all significant effects;
|
|
#' NS_pairwise contains all non-significant effects. S_CN contains effects
|
|
#' significant in only one condition, and effects with a significantly
|
|
#' different magnitude (differential sensitivity). This dataframe is not
|
|
#' conservative using the local false sign rate test - we can't determine
|
|
#' the sign of one of the effects for effects significant in only one
|
|
#' condition - so it's not recommended to use this, but included. S_2_no
|
|
#' contains effects significant in both conditions that do not differ
|
|
#' significantly in magnitude. These effects do not have GxE. S_AP contains
|
|
#' effects significant in both conditions that differ in their sign - and
|
|
#' have antagonistic pleiotropy. S_DS contains effects significant in both
|
|
#' conditions that differ in the magnitude of their effect, but not their
|
|
#' sign - differentially sensitive alleles. S_1_row and S_1_col contain
|
|
#' effects that are significant in just one of the two conditions - the row
|
|
#' or the column, respectively.
|
|
#'
|
|
#' @importFrom dplyr between mutate filter
|
|
#' @importFrom tibble enframe
|
|
#' @importFrom magrittr %>%
|
|
#' @importFrom rlang .data
|
|
#'
|
|
#' @export
|
|
get_GxE = function(m, factor = 0.4, thresh = 0.05){
|
|
R = get_ncond(m) # Effects to consider
|
|
|
|
S_all = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m),
|
|
get_colnames(m)))
|
|
S_2_no = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m),
|
|
get_colnames(m)))
|
|
S_CN = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m),
|
|
get_colnames(m)))
|
|
S_AP = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m),
|
|
get_colnames(m)))
|
|
S_DS = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m),
|
|
get_colnames(m)))
|
|
NS_pair = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m),
|
|
get_colnames(m)))
|
|
S_i = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m),
|
|
get_colnames(m)))
|
|
S_j = matrix(NA, nrow = R, ncol = R, dimnames = list(get_colnames(m),
|
|
get_colnames(m)))
|
|
|
|
for(i in 1:R){
|
|
for(j in 1:R){
|
|
|
|
if(i == j){
|
|
S_all[i, j] = length(get_significant_results(m, thresh = thresh,
|
|
conditions = i))
|
|
S_CN[i, j] = 0
|
|
# Not conservative!!
|
|
S_2_no[i, j] = length(get_significant_results(m, thresh = thresh,
|
|
conditions = i))
|
|
S_AP[i, j] = 0
|
|
S_DS[i, j] = 0
|
|
|
|
sig_i = get_significant_results(m, thresh = thresh, conditions = i)
|
|
all_i = get_significant_results(m, thresh = 1, conditions = i)
|
|
ns_i = dplyr::setdiff(all_i, sig_i) # effects that aren't sig in i
|
|
NS_pair[i, j] = length(ns_i)
|
|
S_i[i, j] = 0
|
|
S_j[i, j] = 0
|
|
|
|
} else {
|
|
|
|
sig_i = get_significant_results(m, thresh = thresh, conditions = i)
|
|
sig_j = get_significant_results(m, thresh = thresh, conditions = j)
|
|
|
|
all_i = get_significant_results(m, thresh = 1, conditions = i)
|
|
all_j = get_significant_results(m, thresh = 1, conditions = j)
|
|
|
|
ns_i = setdiff(all_i, sig_i) # effects that aren't sig in i
|
|
ns_j = setdiff(all_j, sig_j) # effects that aren't sig in j
|
|
|
|
# Markers where we aren't sure of the sign in either condition
|
|
# aka most of the effects
|
|
NS_pair[i,j] <- length(intersect(ns_i, ns_j))
|
|
|
|
# Markers where we are sure of the sign in just one condition
|
|
|
|
# Markers significant in i but not in j
|
|
ms_isigi = intersect(sig_i, ns_j)
|
|
|
|
# Markers significant in j but not in i
|
|
ms_jsigj = intersect(sig_j, ns_i)
|
|
|
|
# Markers where we are sure of the sign in both conditions
|
|
effects_df <- get_pm(m)[union(sig_i, sig_j), i] %>%
|
|
enframe(name = "Marker", value = "Effect_i") %>%
|
|
mutate(Effect_j = get_pm(m)[union(sig_i, sig_j), j],
|
|
ratio = .data$Effect_i/.data$Effect_j, ##divide effect sizes, if this ratio is positive there is not AP
|
|
APratio = .data$Effect_i/-.data$Effect_j) ##divide effect sizes, if this ratio is positive there is AP
|
|
|
|
## GxE: we are sure of the sign for two effects, and they are the same sign
|
|
# No GxE in this pair - effects are same sign and same mag
|
|
ms_sig2_noGxE <- effects_df %>%
|
|
filter(between(.data$ratio, factor, 1/factor))
|
|
|
|
# DS: we are sure of the sign for two effects, and they are the same sign
|
|
ms_sig2_DS <- effects_df %>%
|
|
filter(.data$ratio > 0 & !between(.data$ratio, factor, 1/factor))
|
|
|
|
## GxE we are sure of the sign for two effects, and they are opposite
|
|
# AP: we are sure of the sign for two effects, and they are opposite
|
|
ms_sig2_AP <- effects_df %>%
|
|
filter(between(.data$APratio, 0, 1E10))
|
|
|
|
S_all[i, j] = sum(length(ms_isigi), length(ms_jsigj), nrow(ms_sig2_noGxE),
|
|
nrow(ms_sig2_DS), nrow(ms_sig2_AP))
|
|
S_CN[i, j] = sum(length(ms_isigi), length(ms_jsigj), nrow(ms_sig2_DS))
|
|
# Not conservative!!
|
|
S_2_no[i, j] = sum(nrow(ms_sig2_noGxE))
|
|
S_AP[i, j] = sum(nrow(ms_sig2_AP))
|
|
S_DS[i, j] = sum(nrow(ms_sig2_DS))
|
|
S_i[i, j] = length(ms_isigi)
|
|
S_j[i, j] = length(ms_jsigj)
|
|
}
|
|
}
|
|
}
|
|
return(list(S_all_pairwise = S_all, S_CN = S_CN, S_2_no = S_2_no, S_AP = S_AP,
|
|
S_DS = S_DS, NS_pairwise = NS_pair, S_1_row = S_i,
|
|
S_1_col = S_j))
|
|
}
|
|
|
|
#' @title Get the positions of objects in a mash object Ulist that are above
|
|
#' some mass threshold.
|
|
#'
|
|
#' @description Get the positions of objects in a mash object Ulist that are
|
|
#' above some mass threshold.
|
|
#'
|
|
#' @param m An object of type mash
|
|
#' @param thresh Numeric. The mass threshold for including a covariance matrix
|
|
#'
|
|
#' @export
|
|
get_U_by_mass <- function(m, thresh = 0.05){
|
|
umass <- mash_plot_covar(m, saveoutput = FALSE)
|
|
mass_thresh <-
|
|
umass$covar_df$`Covariance Matrix`[which(umass$covar_df$Mass >= thresh)]
|
|
Ulist_order <- names(get_Ulist(m))
|
|
range <- which(Ulist_order %in% mass_thresh)
|
|
return(range)
|
|
}
|
|
|
|
|
|
#' @title Reorder correlation matrix
|
|
#'
|
|
#' @description Reorder correlation coefficients from a matrix of things
|
|
#' (including NA's) and hierarchically cluster them
|
|
#'
|
|
#' @param cormat A correlation matrix
|
|
#'
|
|
#' @importFrom cluster daisy
|
|
#' @importFrom stats hclust
|
|
#'
|
|
reorder_cormat <- function(cormat){
|
|
# Use correlation between variables as distance
|
|
dd <- daisy(cormat, metric = "gower")
|
|
hc <- hclust(dd)
|
|
cormat <- cormat[hc$order, hc$order]
|
|
}
|
|
|
|
|
|
# --- Plot & Save Plots ---------
|
|
|
|
#' ggplot of single mash effect
|
|
#'
|
|
#' @description Creates a plot with point estimates and standard errors for
|
|
#' effects of a single SNP in multiple conditions.
|
|
#'
|
|
#' @param m An object of type mash
|
|
#' @param snp A bigSNP object, produced by the bigsnpr package. Here,
|
|
#' the WAMI SNP information.
|
|
#' @param n Optional. Integer or integer vector. The result number to plot, in
|
|
#' order of significance. 1 would be the top result, for example. Find
|
|
#' these with \code{\link{get_significant_results}}.
|
|
#' @param i Optional. Integer or integer vector. The result number to plot, in
|
|
#' the order of the mash object. 1 would be the first marker in the mash
|
|
#' object, for example. Find these with \code{\link{get_marker_df}}.
|
|
#' @param marker Optional. Print the marker name on the plot?
|
|
#' @param saveoutput Logical. Should the output be saved to the path?
|
|
#' @param suffix Character. Optional. A unique suffix used to save the files,
|
|
#' instead of the current date & time.
|
|
#'
|
|
#' @note Specify only one of n or i.
|
|
#'
|
|
#' @importFrom ashr get_psd
|
|
#' @importFrom cowplot save_plot
|
|
#' @importFrom tibble enframe
|
|
#' @importFrom dplyr mutate case_when
|
|
#' @importFrom tidyr separate
|
|
#' @import ggplot2
|
|
#' @importFrom purrr as_vector
|
|
#' @importFrom stringr str_replace str_replace_all str_sub str_length
|
|
#'
|
|
#' @export
|
|
mash_plot_marker_effect <- function(m, snp = snp, n = NA, i = NA, marker = TRUE,
|
|
saveoutput = FALSE, suffix = ""){
|
|
stopifnot((typeof(n) %in% c("double", "integer") | typeof(i) %in% c("double",
|
|
"integer")))
|
|
if(attr(snp, "class") != "bigSNP"){
|
|
stop("snp needs to be a bigSNP object, produced by the bigsnpr package.")
|
|
}
|
|
if(typeof(n) != "logical"){
|
|
i <- get_significant_results(m)[n]
|
|
marker_df <- tibble(CHR = snp$map$chromosome[i],
|
|
POS = snp$map$physical.pos[i],
|
|
marker.ID = snp$map$marker.ID[i])
|
|
marker_name <- marker_df$marker.ID
|
|
} else {
|
|
marker_df <- get_marker_df(m, snp)[i,]
|
|
marker_name <- marker_df$marker.ID
|
|
}
|
|
effectplot <- get_colnames(m) %>%
|
|
enframe(name = NULL, value = "Conditions") %>%
|
|
mutate(mn = get_pm(m)[i,],
|
|
se = get_psd(m)[i,]) %>%
|
|
mutate(Conditions = str_replace(.data$Conditions,
|
|
"(Stand_)??Bhat_?",
|
|
""),
|
|
Conditions = str_replace(.data$Conditions, "-mean$", "")
|
|
)
|
|
|
|
ggobject <- ggplot(data = effectplot) +
|
|
geom_point(mapping = aes(x = as.factor(.data$Conditions), y = .data$mn)) +
|
|
geom_errorbar(mapping = aes(ymin = .data$mn - .data$se,
|
|
ymax = .data$mn + .data$se,
|
|
x = .data$Conditions), width = 0.3) +
|
|
geom_hline(yintercept = 0, lty = 2) +
|
|
labs(x = "Conditions", y = "Effect Size") +
|
|
theme_classic() +
|
|
theme(axis.text.x = element_text(size = 10, angle = 45, vjust = 1, hjust = 1),
|
|
panel.background = element_rect(fill=NA),
|
|
legend.position = "none",
|
|
axis.title = element_text(size = 10),
|
|
axis.text.y = element_text(size = 10),
|
|
axis.line.x = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.line.y = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.ticks = element_line(size = 0.25, colour = 'grey50'),
|
|
legend.justification = c(1, 0.75),
|
|
legend.key.size = unit(0.35, 'cm'),
|
|
legend.title = element_blank(),
|
|
legend.text = element_text(size = 9),
|
|
legend.text.align = 0, legend.background = element_blank(),
|
|
plot.subtitle = element_text(size = 10, vjust = 0),
|
|
strip.background = element_blank(),
|
|
strip.text = element_text(hjust = 0.5, size = 10, vjust = 0),
|
|
strip.placement = 'outside', panel.spacing.x = unit(-0., 'cm'))
|
|
if(str_length(effectplot$Conditions[1]) > 10){
|
|
ggobject <- ggobject +
|
|
theme(plot.margin = unit(c(0.2, 0.2, 0.5, 1.5), 'cm'),)
|
|
}
|
|
if(marker == TRUE){
|
|
ggobject <- ggobject + ggtitle(label = marker_name)
|
|
}
|
|
if(saveoutput == TRUE){
|
|
if(!(str_sub(suffix, end = 1) %in% c("", "_"))){
|
|
suffix <- paste0("_", suffix)
|
|
}
|
|
if(str_sub(suffix, end = 1) %in% c("")){
|
|
suffix <- paste0("_", get_date_filename())
|
|
}
|
|
save_plot(filename = paste0("Effect_plot_", str_replace_all(marker_name,
|
|
" ", "_"),
|
|
suffix, ".png"),
|
|
plot = ggobject, base_aspect_ratio = nrow(effectplot)*0.1 + 0.1,
|
|
base_height = 3.5)
|
|
}
|
|
return(list(marker = marker_name, effect_df = effectplot,
|
|
ggobject = ggobject))
|
|
}
|
|
|
|
#' ggplot of covariance matrix masses
|
|
#'
|
|
#' @description Creates a bar plot using ggplot of the masses that are on each
|
|
#' covariance matrix specified in the mash model.
|
|
#'
|
|
#' @param m An object of type mash
|
|
#' @param saveoutput Logical. Should the output be saved to the path?
|
|
#' @param suffix Character. Optional. A unique suffix used to save the files,
|
|
#' instead of the current date & time.
|
|
#'
|
|
#' @importFrom cowplot save_plot
|
|
#' @importFrom tibble enframe
|
|
#' @importFrom dplyr mutate arrange desc
|
|
#' @importFrom stringr str_replace str_sub
|
|
#' @import ggplot2
|
|
#'
|
|
#' @note This plot can be useful for seeing the overall patterns of effects in
|
|
#' the data used in mash. Non-significant effects will add mass to the
|
|
#' "no_effects" covariance matrix, while significant effects will add mass
|
|
#' to one of the other covariance matrices. You can use mash_plot_Ulist()
|
|
#' to plot the covariance matrix patterns themselves.
|
|
#'
|
|
#' @export
|
|
mash_plot_covar <- function(m, saveoutput = FALSE, suffix = ""){
|
|
df <- get_estimated_pi(m)
|
|
df <- enframe(df, name = "Covariance Matrix", value = "Mass") %>%
|
|
mutate(`Covariance Matrix` = str_replace(.data$`Covariance Matrix`,
|
|
"(Stand_)??Bhat_?",
|
|
"single_effect_"),
|
|
`Covariance Matrix` = str_replace(.data$`Covariance Matrix`,
|
|
"^null$", "no_effects")) %>%
|
|
arrange(desc(.data$`Covariance Matrix`))
|
|
df$`Covariance Matrix` <- factor(df$`Covariance Matrix`,
|
|
levels = (df$`Covariance Matrix`))
|
|
ggobject <- ggplot(df) +
|
|
geom_bar(aes(x = .data$Mass, y = .data$`Covariance Matrix`),
|
|
stat = "identity") +
|
|
theme_classic() +
|
|
theme(axis.text.x = element_blank(),
|
|
axis.ticks.x = element_blank(),
|
|
panel.background = element_rect(fill=NA),
|
|
axis.title = element_text(size = 10),
|
|
axis.text = element_text(size = 10),
|
|
axis.line.x = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.line.y = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.ticks = element_line(size = 0.25, colour = 'grey50'),
|
|
legend.justification = c(1, 0.75),
|
|
legend.key.size = unit(0.35, 'cm'),
|
|
legend.title = element_blank(),
|
|
legend.text = element_text(size = 9),
|
|
legend.text.align = 0, legend.background = element_blank(),
|
|
plot.subtitle = element_text(size = 10, vjust = 0),
|
|
strip.background = element_blank(),
|
|
strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0),
|
|
strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm'))
|
|
|
|
if(saveoutput == TRUE){
|
|
if(!(str_sub(suffix, end = 1) %in% c("", "_"))){
|
|
suffix <- paste0("_", suffix)
|
|
}
|
|
if(str_sub(suffix, end = 1) %in% c("")){
|
|
suffix <- paste0("_", get_date_filename())
|
|
}
|
|
save_plot(paste0("Covariance_matrix_mass_plot", suffix,
|
|
".png"), plot = ggobject, base_aspect_ratio = 1,
|
|
base_height = nrow(df)*.15)
|
|
}
|
|
return(list(covar_df = df, ggobject = ggobject))
|
|
}
|
|
|
|
#' ggplot of specific covariance matrix patterns
|
|
#'
|
|
#' @description Creates a tile plot using ggplot of the covariance matrices
|
|
#' specified in the mash model.
|
|
#'
|
|
#' @param m An object of type mash
|
|
#' @param range Numeric vector. Which covariance matrices should be plotted?
|
|
#' @param saveoutput Logical. Should the output be saved to the path?
|
|
#' @param suffix Character. Optional. A unique suffix used to save the files,
|
|
#' instead of the current date & time.
|
|
#' @param limits Logical. Should there be plot limits of -1 and 1? Default is TRUE.
|
|
#' @param labels Logical. Should each pairwise comparison proportion have a label? Default is TRUE.
|
|
#'
|
|
#' @return A list of dataframes used to make the tile plots and the plots
|
|
#' themselves.
|
|
#'
|
|
#' @importFrom cowplot save_plot
|
|
#' @importFrom tibble enframe as_tibble
|
|
#' @importFrom dplyr mutate filter
|
|
#' @importFrom rlist list.append
|
|
#' @importFrom rlang .data
|
|
#' @importFrom stringr str_replace str_sub
|
|
#' @importFrom tidyr pivot_longer
|
|
#' @import ggplot2
|
|
#'
|
|
#' @export
|
|
mash_plot_Ulist <- function(m, range = NA, saveoutput = FALSE, suffix = "",
|
|
limits = TRUE, labels = TRUE){
|
|
Ulist <- get_Ulist(m)
|
|
Ureturn <- list()
|
|
stopifnot(typeof(range) %in% c("double", "integer", "logical"))
|
|
if(typeof(range) == "logical"){
|
|
range <- seq_along(Ulist)
|
|
}
|
|
|
|
for(u in range){
|
|
U1 <- Ulist[[u]]
|
|
# Remove half the tiles if the matrix is symmetric
|
|
if(isSymmetric(U1)){
|
|
for(i in 1:nrow(U1)){
|
|
for(j in 1:ncol(U1)){
|
|
if(i < j){
|
|
U1[i, j] <- NA
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if(ncol(U1) == length(get_colnames(m))){
|
|
colnames(U1) <- str_replace(get_colnames(m), "(Stand_)??Bhat_?", "") %>%
|
|
str_replace("-mean$", "")
|
|
name_order <- str_replace(get_colnames(m), "(Stand_)??Bhat_?", "") %>%
|
|
str_replace("-mean$", "")
|
|
U1 <- as_tibble(U1, .name_repair = "unique") %>%
|
|
mutate(rowU = str_replace(get_colnames(m), "(Stand_)??Bhat_?", ""),
|
|
rowU = str_replace(.data$rowU, "-mean$", "")) %>%
|
|
pivot_longer(cols = -.data$rowU, names_to = "colU",
|
|
values_to = "covar") %>%
|
|
filter(!is.na(.data$covar))
|
|
U1$rowU <- factor(U1$rowU, levels = name_order)
|
|
U1$colU <- factor(U1$colU, levels = name_order)
|
|
} else {
|
|
U1 <- as_tibble(U1, rownames = "rowU", .name_repair = "unique") %>%
|
|
pivot_longer(cols = -.data$rowU, names_to = "colU",
|
|
values_to = "covar") %>%
|
|
mutate(rowU = paste0("Condition", .data$rowU),
|
|
colU = str_replace(.data$colU, "\\.\\.\\.", "Condition")) %>%
|
|
filter(!is.na(.data$covar))
|
|
}
|
|
|
|
if(limits == TRUE){
|
|
U1_covar <- U1 %>%
|
|
ggplot(aes(x = .data$rowU, y = .data$colU)) +
|
|
geom_tile(aes(fill = .data$covar), na.rm = TRUE) +
|
|
scale_fill_gradientn(colors = c("#440154FF", "#3B528BFF", "#2C728EFF",
|
|
"white", "#27AD81FF", "#5DC863FF",
|
|
"#FDE725FF"), limits = c(-1,1)) +
|
|
#geom_text(aes(label = round(.data$covar, 1)), color = "darkgrey") +
|
|
theme(legend.position = "right",
|
|
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
|
|
xlab("") + ylab("") + ggtitle(names(Ulist)[u])
|
|
} else {
|
|
lim_lower <- min(U1$covar)
|
|
lim_upper <- max(U1$covar) + max(U1$covar) * 0.01
|
|
U1_covar <- U1 %>%
|
|
ggplot(aes(x = .data$rowU, y = .data$colU)) +
|
|
geom_tile(aes(fill = .data$covar), na.rm = TRUE) +
|
|
scale_fill_gradientn(colors = c("#440154FF", "#472D7BFF", "#3B528BFF",
|
|
"#2C728EFF", "#21908CFF", "#27AD81FF",
|
|
"#5DC863FF", "#AADC32FF", "#FDE725FF"),
|
|
limits = c(lim_lower, lim_upper)) +
|
|
#geom_text(aes(label = round(.data$covar, 1)), color = "darkgrey") +
|
|
theme_classic() +
|
|
theme(panel.background = element_rect(fill=NA),
|
|
legend.position = "right",
|
|
axis.title = element_text(size = 10),
|
|
axis.text.y = element_text(size = 10),
|
|
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
|
|
axis.line.x = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.line.y = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.ticks = element_line(size = 0.25, colour = 'grey50'),
|
|
legend.justification = c(1, 0.75),
|
|
legend.key.size = unit(0.35, 'cm'),
|
|
legend.title = element_blank(),
|
|
legend.text = element_text(size = 9),
|
|
legend.text.align = 0, legend.background = element_blank(),
|
|
plot.subtitle = element_text(size = 10, vjust = 0),
|
|
strip.background = element_blank(),
|
|
strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0),
|
|
strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) +
|
|
xlab("") + ylab("") + ggtitle(names(Ulist)[u])
|
|
}
|
|
if (labels == TRUE) {
|
|
U1_covar <- U1_covar +
|
|
geom_text(aes(label = round(.data$covar, 1)), color = "darkgrey")
|
|
}
|
|
|
|
Ureturn <- list.append(Ureturn, U1 = U1,
|
|
ggobject = U1_covar)
|
|
names(Ureturn)[-2] <- paste0(names(Ulist)[u], "_df")
|
|
names(Ureturn)[-1] <- paste0(names(Ulist)[u], "_ggobject")
|
|
|
|
if(saveoutput == TRUE){
|
|
if(!(str_sub(suffix, end = 1) %in% c("", "_"))){
|
|
suffix <- paste0("_", suffix)
|
|
}
|
|
if(str_sub(suffix, end = 1) %in% c("")){
|
|
suffix <- paste0("_", get_date_filename())
|
|
}
|
|
save_plot(paste0("Covariances_plot_", names(Ulist)[u], suffix, ".png"),
|
|
plot = U1_covar,
|
|
base_height = length(get_colnames(m))*.18 + 1.2,
|
|
base_asp = 1.35)
|
|
}
|
|
|
|
}
|
|
return(Ureturn)
|
|
}
|
|
|
|
#' @title Manhattan plot in ggplot colored by significant conditions
|
|
#'
|
|
#' @description Takes a mash object and, for some vector of phenotypes, returns
|
|
#' a Manhattan plot ggplot object (and its dataframe). Each SNP in the plot
|
|
#' is colored by the number of phenotypes it is significant for. Even and
|
|
#' odd chromosomes have different shapes for their SNPs, so that
|
|
#' chromosome identity can be determined.
|
|
#'
|
|
#' @param m A mash object (outputted by mash).
|
|
#' @param snp A bigSNP object, produced by the bigsnpr package. Here,
|
|
#' the WAMI SNP information.
|
|
#' @param cond A vector of phenotypes. Defaults to the names of each
|
|
#' column in the mash object.
|
|
#' @param saveoutput Logical. Should the output be saved to the path?
|
|
#' @param suffix Character. Optional. A unique suffix used to save the files,
|
|
#' instead of the current date & time.
|
|
#' @param thresh Numeric. The threshold used for the local false sign rate to
|
|
#' call significance in a condition.
|
|
#'
|
|
#' @return A \code{tbl_df()} of the data used to make the Manhattan plot, and a
|
|
#' ggplot object containing the Manhattan.
|
|
#'
|
|
#' @importFrom cowplot save_plot
|
|
#' @importFrom dplyr rename select arrange mutate left_join
|
|
#' @import ggplot2
|
|
#' @importFrom tibble as_tibble rownames_to_column enframe
|
|
#' @importFrom tidyr separate
|
|
#' @importFrom stringr str_sub
|
|
#'
|
|
#' @examples
|
|
#' \dontrun{manhattan_out <- mash_ggman_by_condition(m = m, saveoutput = TRUE)}
|
|
#'
|
|
#' @export
|
|
mash_plot_manhattan_by_condition <- function(m, snp, cond = NA,
|
|
saveoutput = FALSE,
|
|
suffix = "", thresh = 0.05){
|
|
if(attr(snp, "class") != "bigSNP"){
|
|
stop("snp needs to be a bigSNP object, produced by the bigsnpr package.")
|
|
}
|
|
num_sig_in_cond <- c()
|
|
|
|
if(is.na(cond)[1]){
|
|
cond <- get_colnames(m = m)
|
|
}
|
|
|
|
log10bf_df <- get_log10bf(m = m) %>%
|
|
as.data.frame() %>%
|
|
rownames_to_column(var = "value") %>%
|
|
mutate(value = as.integer(.data$value)) %>%
|
|
as_tibble() %>%
|
|
left_join(get_marker_df(m = m, snp = snp), by = "value") %>%
|
|
dplyr::rename(log10BayesFactor = .data$V1) %>%
|
|
dplyr::select(-.data$value) %>%
|
|
mutate(Num_Sig_Conditions = get_n_significant_conditions(m = m,
|
|
thresh = thresh,
|
|
conditions = cond) )
|
|
|
|
log10BF <- expression(paste("log"[10], plain("(Bayes Factor)")))
|
|
|
|
ggmanobject <- ggplot(data = log10bf_df, aes(x = .data$POS,
|
|
y = .data$log10BayesFactor)) +
|
|
geom_point(aes(color = .data$Num_Sig_Conditions, fill = .data$Num_Sig_Conditions,
|
|
shape = as.factor(.data$CHR))) +
|
|
facet_wrap(~ .data$CHR, nrow = 1, scales = "free_x", strip.position = "bottom") +
|
|
scale_color_viridis_c(option = "B", end = 0.95) +
|
|
scale_fill_viridis_c(option = "B", end = 0.95) +
|
|
theme_classic() +
|
|
theme(axis.text.x = element_blank(),
|
|
axis.ticks.x = element_blank(),
|
|
panel.background = element_rect(fill=NA),
|
|
legend.position = "right",
|
|
axis.title = element_text(size = 10),
|
|
axis.text = element_text(size = 10),
|
|
axis.line.x = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.line.y = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.ticks = element_line(size = 0.25, colour = 'grey50'),
|
|
legend.justification = c(1, 0.75),
|
|
legend.key.size = unit(0.35, 'cm'),
|
|
legend.title = element_blank(),
|
|
legend.text = element_text(size = 9),
|
|
legend.text.align = 0, legend.background = element_blank(),
|
|
plot.subtitle = element_text(size = 10, vjust = 0),
|
|
strip.background = element_blank(),
|
|
strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0),
|
|
strip.placement = 'outside', panel.spacing.x = unit(-0.05, 'cm')) +
|
|
labs(x = "Chromosome", y = log10BF) +
|
|
scale_x_continuous(expand = c(0.32, 0.32)) +
|
|
scale_shape_manual(values = rep(c(21, 22, 23), 9), guide = FALSE)
|
|
|
|
if(saveoutput == TRUE){
|
|
if(!(str_sub(suffix, end = 1) %in% c("", "_"))){
|
|
suffix <- paste0("_", suffix)
|
|
}
|
|
if(str_sub(suffix, end = 1) %in% c("")){
|
|
suffix <- paste0("_", get_date_filename())
|
|
}
|
|
save_plot(paste0("Manhattan_mash", suffix,
|
|
".png"), plot = ggmanobject, base_aspect_ratio = 2.6,
|
|
base_height = 3.3)
|
|
}
|
|
|
|
return(list(ggman_df = log10bf_df, ggmanobject = ggmanobject))
|
|
}
|
|
|
|
|
|
#' @title Create a ggplot of pairwise sharing of mash effects
|
|
#'
|
|
#' @description Given a correlation matrix, an RDS with a correlation matrix, or
|
|
#' a mash object, create a ggplot of pairwise sharing of mash effects using
|
|
#' \code{\link{get_pairwise_sharing}} and \code{\link{ggcorr}}.
|
|
#'
|
|
#' @param m An object of type mash
|
|
#' @param effectRDS An RDS containing a correlation matrix.
|
|
#' @param corrmatrix A correlation matrix
|
|
#' @param reorder Logical. Should the columns be reordered by similarity?
|
|
#' @param saveoutput Logical. Should the output be saved to the path?
|
|
#' @param suffix Character. Optional. A unique suffix used to save the files,
|
|
#' instead of the current date & time.
|
|
#' @param filename Character string with an output filename. Optional.
|
|
#' @param ... Other arguments to \code{\link{get_pairwise_sharing}} or
|
|
#' \code{\link{ggcorr}}.
|
|
#'
|
|
#' @importFrom GGally ggcorr
|
|
#' @importFrom stringr str_replace_all str_sub
|
|
#' @importFrom rlang .data
|
|
#'
|
|
#' @return A list containing a dataframe containing the correlations and a
|
|
#' ggplot2 object containing the correlation plot.
|
|
#'
|
|
#' @export
|
|
mash_plot_pairwise_sharing <- function(m = NULL, effectRDS = NULL,
|
|
corrmatrix = NULL, reorder = TRUE,
|
|
saveoutput = FALSE, filename = NA,
|
|
suffix = "", ...){
|
|
# Additional arguments for get_pairwise_sharing, ggcorr, and save_plot
|
|
requireNamespace("dots")
|
|
factor <- dots::dots(name = 'factor', value = 0.5, ...)
|
|
lfsr_thresh <- dots::dots(name = 'lfsr_thresh', value = 0.05, ...)
|
|
FUN <- dots::dots(name = 'FUN', value = identity, ...)
|
|
geom <- dots::dots(name = 'geom', value = 'circle', ...)
|
|
label <- dots::dots(name = 'label', value = FALSE, ...)
|
|
label_alpha <- dots::dots(name = 'label_alpha', value = TRUE, ...)
|
|
label_size <- dots::dots(name = 'label_size', value = 3, ...)
|
|
hjust <- dots::dots(name = 'hjust', value = 0.95, ...)
|
|
vjust <- dots::dots(name = 'vjust', value = 0.3, ...)
|
|
layout.exp <- dots::dots(name = 'layout.exp', value = 9, ...)
|
|
min_size <- dots::dots(name = 'min_size', value = 0, ...)
|
|
max_size <- dots::dots(name = 'max_size', value = 3.5, ...)
|
|
option <- dots::dots(name = 'option', value = 'B', ...)
|
|
dpi <- dots::dots(name = 'dpi', value = 500, ...)
|
|
|
|
base_aspect_ratio <- dots::dots(name = 'base_aspect_ratio', value = 1.1, ...)
|
|
|
|
if(is.na(filename)[1]){
|
|
if(!(str_sub(suffix, end = 1) %in% c("", "_"))){
|
|
suffix <- paste0("_", suffix)
|
|
}
|
|
if(str_sub(suffix, end = 1) %in% c("")){
|
|
suffix <- paste0("_", get_date_filename())
|
|
}
|
|
filename <- paste0("Mash_pairwise_shared_effects", suffix, ".png")
|
|
}
|
|
|
|
# look for a shared effects matrix in the path, and if not, generate one
|
|
if(!is.null(effectRDS) && is.null(m) && is.null(corrmatrix)){
|
|
shared_effects <- readRDS(effectRDS)
|
|
} else if(!is.null(corrmatrix) && is.null(effectRDS) && is.null(m)){
|
|
shared_effects <- corrmatrix
|
|
} else if(!is.null(m)){
|
|
shared_effects <- get_pairwise_sharing(m = m, factor = factor,
|
|
lfsr_thresh = lfsr_thresh, FUN = FUN)
|
|
rownames(shared_effects) <- str_replace_all(rownames(shared_effects),
|
|
"(Stand_)??Bhat_?", "") %>%
|
|
str_replace("-mean$", "")
|
|
colnames(shared_effects) <- str_replace_all(colnames(shared_effects),
|
|
"(Stand_)??Bhat_?", "") %>%
|
|
str_replace("-mean$", "")
|
|
} else {
|
|
stop(paste0("Please specify one of these: ",
|
|
"1. a mash output object (m), ",
|
|
"2. the path to a effect rds file (mashRDS), ",
|
|
"3. a correlation matrix (corrmatrix)."))
|
|
}
|
|
|
|
base_height <- dots::dots(name = 'base_height',
|
|
value = nrow(shared_effects)*0.33+1, ...)
|
|
|
|
if(reorder == TRUE){
|
|
corrdf <- reorder_cormat(cormat = shared_effects)
|
|
corrplot <- ggcorr(data = NULL, cor_matrix = corrdf, geom = geom,
|
|
label = label, label_alpha = label_alpha,
|
|
label_size = label_size, hjust = hjust, vjust = vjust,
|
|
layout.exp = layout.exp, min_size = min_size,
|
|
max_size = max_size) +
|
|
scale_color_viridis_c(option = option) +
|
|
theme_classic() +
|
|
theme(axis.text.x = element_blank(),
|
|
axis.ticks.x = element_blank(),
|
|
panel.background = element_rect(fill=NA),
|
|
axis.title = element_text(size = 10),
|
|
axis.text = element_text(size = 10),
|
|
axis.line.x = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.line.y = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.ticks = element_line(size = 0.25, colour = 'grey50'),
|
|
legend.justification = c(1, 0.75),
|
|
legend.key.size = unit(0.35, 'cm'),
|
|
legend.title = element_blank(),
|
|
legend.text = element_text(size = 9),
|
|
legend.text.align = 0, legend.background = element_blank(),
|
|
plot.subtitle = element_text(size = 10, vjust = 0),
|
|
strip.background = element_blank(),
|
|
strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0),
|
|
strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm'))
|
|
} else {
|
|
corrplot <- ggcorr(data = NULL, cor_matrix = shared_effects, geom = geom,
|
|
label = label, label_alpha = label_alpha,
|
|
label_size = label_size, hjust = hjust, vjust = vjust,
|
|
layout.exp = layout.exp, min_size = min_size,
|
|
max_size = max_size) +
|
|
scale_color_viridis_c(option = option) +
|
|
theme_classic() +
|
|
theme(axis.text.x = element_blank(),
|
|
axis.ticks.x = element_blank(),
|
|
panel.background = element_rect(fill=NA),
|
|
axis.title = element_text(size = 10),
|
|
axis.text = element_text(size = 10),
|
|
axis.line.x = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.line.y = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.ticks = element_line(size = 0.25, colour = 'grey50'),
|
|
legend.justification = c(1, 0.75),
|
|
legend.key.size = unit(0.35, 'cm'),
|
|
legend.title = element_blank(),
|
|
legend.text = element_text(size = 9),
|
|
legend.text.align = 0, legend.background = element_blank(),
|
|
plot.subtitle = element_text(size = 10, vjust = 0),
|
|
strip.background = element_blank(),
|
|
strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0),
|
|
strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm'))
|
|
}
|
|
|
|
if(saveoutput == TRUE){
|
|
save_plot(filename = filename, corrplot,
|
|
base_aspect_ratio = base_aspect_ratio, base_height = base_height,
|
|
dpi = dpi)
|
|
}
|
|
return(list(corr_matrix = shared_effects, gg_corr = corrplot))
|
|
}
|
|
|
|
#' @title Significant SNPs per number of conditions
|
|
#'
|
|
#' @description For some number of columns in a mash object that correspond to
|
|
#' conditions, find the number of SNPs that are significant for that number
|
|
#' of conditions.
|
|
#'
|
|
#' @param m An object of type mash
|
|
#' @param conditions A vector of conditions. Get these with get_colnames(m).
|
|
#' @param saveoutput Logical. Save plot output to a file? Default is FALSE.
|
|
#' @param thresh What is the threshold to call an effect significant? Default
|
|
#' is 0.05.
|
|
#' @param suffix Character. Optional. A unique suffix used to save the files,
|
|
#' instead of the current date & time.
|
|
#'
|
|
#' @return A list containing a dataframe of the number of SNPs significant per
|
|
#' number of conditions, and a ggplot object using that dataframe.
|
|
#'
|
|
#' @import ggplot2
|
|
#' @importFrom tibble enframe
|
|
#' @importFrom dplyr rename summarise filter group_by n
|
|
#' @importFrom stringr str_sub
|
|
#'
|
|
#' @examples
|
|
#' \dontrun{mash_plot_sig_by_condition(m = mash_obj, saveoutput = TRUE)}
|
|
#'
|
|
#' @export
|
|
mash_plot_sig_by_condition <- function(m, conditions = NA, saveoutput = FALSE,
|
|
suffix = "", thresh = 0.05){
|
|
|
|
thresh <- as.numeric(thresh)
|
|
num_sig_in_cond <- c()
|
|
|
|
if(typeof(conditions) == "logical"){
|
|
cond <- get_colnames(m)
|
|
} else {
|
|
cond <- conditions
|
|
}
|
|
|
|
SigHist <- get_n_significant_conditions(m = m, thresh = thresh,
|
|
conditions = cond) %>%
|
|
enframe(name = "Marker") %>%
|
|
rename(Number_of_Conditions = .data$value) %>%
|
|
group_by(.data$Number_of_Conditions) %>%
|
|
summarise(Significant_SNPs = n()) %>%
|
|
filter(.data$Number_of_Conditions != 0)
|
|
|
|
vis <- ggplot(SigHist, aes(x = .data$Number_of_Conditions,
|
|
y = .data$Significant_SNPs)) +
|
|
geom_line() +
|
|
geom_point() +
|
|
geom_hline(yintercept = 0, lty = 2) +
|
|
xlab(label = "Number of Conditions") +
|
|
ylab(label = "Number of Significant SNPs")+
|
|
theme_classic() +
|
|
theme(axis.text.x = element_blank(),
|
|
axis.ticks.x = element_blank(),
|
|
panel.background = element_rect(fill=NA),
|
|
legend.position = "none",
|
|
axis.title = element_text(size = 10),
|
|
axis.text = element_text(size = 10),
|
|
axis.line.x = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.line.y = element_line(size = 0.35, colour = 'grey50'),
|
|
axis.ticks = element_line(size = 0.25, colour = 'grey50'),
|
|
legend.justification = c(1, 0.75),
|
|
legend.key.size = unit(0.35, 'cm'),
|
|
legend.title = element_blank(),
|
|
legend.text = element_text(size = 9),
|
|
legend.text.align = 0, legend.background = element_blank(),
|
|
plot.subtitle = element_text(size = 10, vjust = 0),
|
|
strip.background = element_blank(),
|
|
strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0),
|
|
strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm'))
|
|
|
|
if(saveoutput == TRUE){
|
|
if(!(str_sub(suffix, end = 1) %in% c("", "_"))){
|
|
suffix <- paste0("_", suffix)
|
|
}
|
|
if(str_sub(suffix, end = 1) %in% c("")){
|
|
suffix <- paste0("_", get_date_filename())
|
|
}
|
|
ggsave(paste0("SNPs_significant_by_number_of_conditions", suffix, ".png"),
|
|
width = 5, height = 3, units = "in", dpi = 400)
|
|
}
|
|
|
|
return(list(sighist = SigHist, ggobject = vis))
|
|
}
|