Error checks and plotting speedups for dive_phe2mash function

2021-03-31 18:17:02 -05:00
parent 027323acf6
commit 1edd72d15e
26 changed files with 779 additions and 63 deletions
--- a/5
+++ b/5
@@ -15,10 +15,13 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.1.1
 Imports: 
    ashr,
    bigassertr,
    bigsnpr,
    bigstatsr,
    cluster,
    cowplot,
    dplyr,
    GGally,
    ggplot2,
    mashr,
    magrittr,
@@ -26,6 +29,8 @@ Imports:
    purrr,
    readr,
    rlang (>= 0.1.2),
    rlist,
    stringr,
    tibble,
    tidyr,
    tidyselect
--- a/25
+++ b/25
@@ -11,25 +11,44 @@ export(dive_phe2mash)
 export(enquo)
 export(enquos)
 export(expr)
 export(get_GxE)
 export(get_U_by_mass)
 export(get_lambdagc)
 export(get_pairwise_sharing)
 export(get_qqplot)
 export(get_significant_results)
 export(mash_plot_Ulist)
 export(mash_plot_covar)
 export(mash_plot_manhattan_by_condition)
 export(mash_plot_marker_effect)
 export(mash_plot_pairwise_sharing)
 export(mash_plot_sig_by_condition)
 export(sym)
 export(syms)
 import(bigsnpr)
 import(bigstatsr)
 import(ggplot2)
 import(mashr)
 importFrom(GGally,ggcorr)
 importFrom(ashr,get_fitted_g)
 importFrom(ashr,get_lfsr)
 importFrom(ashr,get_pm)
 importFrom(ashr,get_psd)
 importFrom(bigassertr,printf)
 importFrom(bigsnpr,snp_autoSVD)
 importFrom(cluster,daisy)
 importFrom(cowplot,save_plot)
 importFrom(dplyr,arrange)
 importFrom(dplyr,between)
 importFrom(dplyr,case_when)
 importFrom(dplyr,desc)
 importFrom(dplyr,filter)
 importFrom(dplyr,full_join)
 importFrom(dplyr,group_by)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,mutate_if)
 importFrom(dplyr,n)
 importFrom(dplyr,rename)
 importFrom(dplyr,select)
 importFrom(dplyr,slice)
@@ -52,11 +71,15 @@ importFrom(rlang,enquos)
 importFrom(rlang,expr)
 importFrom(rlang,sym)
 importFrom(rlang,syms)
 importFrom(rlist,list.append)
 importFrom(stats,hclust)
 importFrom(stats,median)
 importFrom(stats,ppoints)
 importFrom(stats,predict)
 importFrom(stats,qbeta)
 importFrom(stats,uniroot)
 importFrom(stringr,str_replace)
 importFrom(stringr,str_replace_all)
 importFrom(tibble,add_column)
 importFrom(tibble,add_row)
 importFrom(tibble,as_tibble)
@@ -64,6 +87,8 @@ importFrom(tibble,enframe)
 importFrom(tibble,rownames_to_column)
 importFrom(tibble,tibble)
 importFrom(tidyr,gather)
 importFrom(tidyr,pivot_longer)
 importFrom(tidyr,replace_na)
 importFrom(tidyr,separate)
 importFrom(tidyselect,all_of)
 importFrom(utils,tail)
--- a/R/wrapper.R
+++ b/R/wrapper.R
@@ -42,6 +42,7 @@
 #' @param U.hyp Other covariance matrices for mash. Specify these as a list. These
 #'     matrices must have dimensions that match the number of phenotypes where
 #'     univariate GWAS ran successfully.
 #' @param verbose Output some information on the iterations? Default is `TRUE`.
 #'
 #' @return A mash object made up of all phenotypes where univariate GWAS ran
 #'     successfully.
@@ -56,6 +57,7 @@
 #' @importFrom tidyr replace_na
 #' @importFrom matrixStats colMaxs rowMaxs
 #' @importFrom stats predict
 #' @importFrom bigassertr printf
 #'
 #' @export
 dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
@@ -64,7 +66,7 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
                          thr.m = c("sum", "max"), num.strong = 1000,
                          num.random = NA,
                          scale.phe = TRUE, roll.size = 50, U.ed = NA,
-                          U.hyp = NA){
+                          U.hyp = NA, verbose = TRUE){
  # 1. Stop if not functions. ----
  if (attr(snp, "class") != "bigSNP") {
    stop("snp needs to be a bigSNP object, produced by the bigsnpr package.")
@@ -80,6 +82,9 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
  } else {
    type <- rep(type, ncol(df) - 1)
  }
  if (!dir.exists(outputdir)) {
    dir.create(outputdir)
  }
  ## 1a. Generate useful values ----
  G <- snp$genotypes
@@ -94,12 +99,13 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
  bonferroni <- -log10(0.05/length(snp$map$physical.pos))
  markers <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos,
                    marker.ID = snp$map$marker.ID) %>%
-    mutate(CHRN = as.numeric(as.factor(.data$CHR)))
+    mutate(CHRN = as.numeric(as.factor(.data$CHR)),
           CHR = as.factor(.data$CHR))
  # 2. Pop Structure Correction  ----
  if (is.null(svd)) {
-    message(paste0("Covariance matrix (svd) was not supplied - this will be",
+    printf2(verbose = verbose, "\nCovariance matrix (svd) was not supplied - ")
-                   " generated using snp_autoSVD()."))
+    printf2(verbose = verbose, "\nthis will be generated using snp_autoSVD()")
    svd <- snp_autoSVD(G = G, infos.chr = markers$CHRN, infos.pos = markers$POS,
                       k = 10, thr.r2 = thr.r2, roll.size = roll.size)
    } else {
@@ -107,6 +113,7 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
      }
  pc_max <- ncol(svd$u)
  gwas_ok <- c()
  first_gwas_ok <- FALSE
  for (i in 2:ncol(df)) {
    df1 <- df %>%
@@ -127,8 +134,10 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
    gwas_ok[i-1] <- check_gwas(df1 = df1, phename = phename, type = type[i-1],
                               nPhe = nPhe, minphe = min.phe, nLev = nLev)
    # Find best # PCs to correct for population structure for each phenotype.
    if(gwas_ok[i-1]){
      lambdagc_df <- div_lambda_GC(df = df1, type = type[i-1], snp = snp,
                                   svd = svd, npcs = c(0:pc_max))
      PC_df <- get_best_PC_df(lambdagc_df)
@@ -145,29 +154,22 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
                        nsnp = nSNP, npcs = PC_df$NumPCs, nphe = nPhe,
                        nlev = nLev, lambda_GC = PC_df$lambda_GC,
                        bonferroni = bonferroni)
-    # plot Manhattan and QQ if save.plots == TRUE
+
-    if(save.plots == TRUE){
+    # plot  QQ if save.plots == TRUE
    if (save.plots == TRUE) {
      qqplot <- get_qqplot(ps = gwas$pvalue, lambdaGC = TRUE)
-      manhattan <- get_manhattan(log10p = gwas$log10p, snp = snp,
+      }
                                 thresh = bonferroni)  # could round these too
      plotname <- paste0(gwas_data$phe, "_", gwas_data$type, "_model_",
                         gwas_data$nphe, "g_", gwas_data$nsnp, "_SNPs_",
                         gwas_data$npcs, "_PCs.png")
      save_plot(filename = file.path(outputdir, paste0("QQplot_", plotname)),
                plot = qqplot, base_asp = 1, base_height = 4)
      save_plot(filename = file.path(outputdir, paste0("Manhattan_", plotname)),
                plot = manhattan, base_asp = 2.1, base_height = 3.5)
    }
    # save gwas outputs together in a fbm
    gwas <- gwas %>%
      select(.data[["estim"]], .data[["std.err"]], .data[["log10p"]])
-    if(i == 2){     # save .bk and .rds file the first time through the loop.
+    if(!first_gwas_ok){     # save .bk and .rds file the first time through the loop.
      if (!grepl("_$", suffix) & suffix != ""){
        suffix <- paste0("_", suffix)
      }
-      fbm.name <- paste0(outputdir, "gwas_effects", suffix)
+      first_gwas_ok <- TRUE
      fbm.name <- file.path(outputdir, paste0("gwas_effects", suffix))
      colnames_fbm <- c(paste0(phename, "_Effect"), paste0(phename, "_SE"),
                       paste0(phename, "_log10p"))
@@ -179,18 +181,43 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
      colnames_fbm <- c(colnames_fbm, paste0(phename, "_Effect"),
                        paste0(phename, "_SE"), paste0(phename, "_log10p"))
      gwas2$add_columns(ncol_add = 3)
-      gwas2[,c(i*3-5, i*3-4, i*3-3)] <- gwas
+      gwas2[, c(sum(gwas_ok)*3 - 2, sum(gwas_ok)*3 - 1,
                sum(gwas_ok)*3)] <- gwas
      gwas2$save()
-      gwas_metadata <- add_row(gwas_metadata, phe = phename, type = type[i-1],
+      gwas_metadata <- add_row(gwas_metadata, phe = phename, type = type[i - 1],
                           nsnp = nSNP, npcs = PC_df$NumPCs, nphe = nPhe,
                           nlev = nLev, lambda_GC = PC_df$lambda_GC,
                           bonferroni = bonferroni)
    }
    # plot Manhattan and QQ if save.plots == TRUE
    if (save.plots == TRUE) {
      # set aspect ratio based on number of SNPs in snp file
      asp <- log10(snp$genotypes$ncol)/2
      if(asp < 1.1){
        asp <- 1.1
      }
      manhattan <- get_manhattan(X = gwas2, ind = sum(gwas_ok)*3, snp = snp,
                                 thresh = bonferroni)
      plotname <- paste0(gwas_data$phe, "_", gwas_data$type, "_model_",
                         gwas_data$nphe, "g_", gwas_data$nsnp, "_SNPs_",
                         gwas_data$npcs, "_PCs.png")
      save_plot(filename = file.path(outputdir, paste0("QQplot_", plotname)),
                plot = qqplot, base_asp = 1, base_height = 4)
      save_plot(filename = file.path(outputdir, paste0("Manhattan_", plotname)),
                plot = manhattan, base_asp = asp, base_height = 3.75)
    }
  rm(gwas)
-  message(paste0("Finished phenotype ", i-1, ": ", names(df)[i]))
+  printf2(verbose = verbose, "\nFinished GWAS on phenotype %s. ",
          names(df)[i])
    } else {
      printf2(verbose = verbose, "\nSkipping GWAS on phenotype %s. ",
              names(df)[i])
    }
  }
  printf2(verbose = verbose, "\nNow preparing gwas effects for use in mash.\n")
  # 4. mash input ----
      ## prioritize effects with max(log10p) or max(sum(log10p))
      ## make a random set of relatively unlinked SNPs
@@ -198,7 +225,7 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
  ind_se <- (1:sum(gwas_ok))*3 - 1
  ind_p <- (1:sum(gwas_ok))*3
-  if(thr.m == "sum"){
+  if (thr.m == "sum") {
  thr_log10p <- big_apply(gwas2,
                         a.FUN = function(X, ind) rowSums(X[, ind]),
                         ind = ind_p,
@@ -211,21 +238,21 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
  }
  gwas2$add_columns(ncol_add = 1)
  colnames_fbm <- c(colnames_fbm, paste0(thr.m, "_thr_log10p"))
-  gwas2[,(sum(gwas_ok)*3+1)] <- thr_log10p
+  gwas2[,(sum(gwas_ok)*3 + 1)] <- thr_log10p
  gwas2$save()
    ## replace NA or Nan values
  # Replace SE with 1's, estimates and p values with 0's.
  replace_na_1 <- function(X, ind) replace_na(X[, ind], 1)
  replace_na_0 <- function(X, ind) replace_na(X[, ind], 0)
-  gwas2[,ind_se] <- big_apply(gwas2, a.FUN = replace_na_1, ind = ind_se,
+  gwas2[, ind_se] <- big_apply(gwas2, a.FUN = replace_na_1, ind = ind_se,
                              a.combine = 'plus')
-  gwas2[,ind_estim] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_estim,
+  gwas2[, ind_estim] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_estim,
                                 a.combine = 'plus')
-  gwas2[,ind_p] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_p,
+  gwas2[, ind_p] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_p,
                                 a.combine = 'plus')
-  gwas2[,(sum(gwas_ok)*3+1)] <- big_apply(gwas2, a.FUN = replace_na_0,
+  gwas2[, (sum(gwas_ok)*3+1)] <- big_apply(gwas2, a.FUN = replace_na_0,
-                                          ind = (sum(gwas_ok)*3+1),
+                                          ind = (sum(gwas_ok)*3 + 1),
                                          a.combine = 'plus')
  gwas2$save()
@@ -301,31 +328,26 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
  # 5. mash ----
  data_r <- mashr::mash_set_data(Bhat_random, Shat_random)
-  message(paste0("Estimating the correlation structure in the null tests from ",
+  printf2(verbose = verbose, "\nEstimating correlation structure in the null tests from a random sample of clumped data.")
                 "the random data.
                 (not the strong data because it will not necessarily contain
                 any null tests)."))
  Vhat <- mashr::estimate_null_correlation_simple(data = data_r)
  message(paste0("Setting up the main data objects with this correlation ",
                 "structure in place."))
  data_strong <- mashr::mash_set_data(Bhat_strong, Shat_strong, V = Vhat)
  data_random <- mashr::mash_set_data(Bhat_random, Shat_random, V = Vhat)
  data_full <- mashr::mash_set_data(Bhat_full, Shat_full, V = Vhat)
  U_c <- mashr::cov_canonical(data_random)
-  if (!is.na(U.ed[1])) {
+  if (is.na(U.ed[1])) {
-  message(paste0("Now estimating data-driven covariances using the strong",
+    printf2(verbose = verbose, "\nNow estimating data-driven covariances using
-                 " tests.
+    the strong tests. NB: This step may take some time to complete.\n")
-                 NB: This step may take some time to complete."))
+    if (length(ind_p) < 6) {
-  if (length(ind_p) < 6) {
+      cov_npc <- ind_p - 1
-    cov_npc <- ind_p - 1
+    } else {
-  } else {
+      cov_npc <- 5
-    cov_npc <- 5
+    }
-  }
+    U_pca = mashr::cov_pca(data_strong, npc = cov_npc)
-  U_pca = mashr::cov_pca(data_strong, npc = cov_npc)
+    U_ed = mashr::cov_ed(data_strong, U_pca)
-  U_ed = mashr::cov_ed(data_strong, U_pca)
+    saveRDS(U_ed, file = file.path(outputdir, paste0("Mash_U_ed", suffix,
-  saveRDS(U_ed, file = paste0(outputdir, "Mash_U_ed", suffix, ".rds"))
+                                                     ".rds")))
  } else if (typeof(U.ed) == "list") {
    U_ed <- U.ed
  } else if (typeof(U.ed) == "character") {
@@ -337,12 +359,15 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
  if (typeof(U.hyp) == "list") {
    m = mashr::mash(data_random, Ulist = c(U_ed, U_c, U.hyp), outputlevel = 1)
-  } else {
+    } else if (typeof(U.hyp) == "character") {
    U_hyp <- readRDS(file = U.hyp)
    m = mashr::mash(data_random, Ulist = c(U_ed, U_c, U_hyp), outputlevel = 1)
    } else {
    m = mashr::mash(data_random, Ulist = c(U_ed, U_c), outputlevel = 1)
    printf2(verbose = verbose, "\nNo user-specified covariance matrices were included in the mash fit.")
  }
-  message(paste0("Compute posterior matrices for all effects",
+  printf2(verbose = verbose, "\nComputing posterior weights for all effects
-                 " using the mash fit from the
+  using the mash fit from the random tests.")
               random tests."))
  m2 = mashr::mash(data_full, g = ashr::get_fitted_g(m), fixg = TRUE)
  return(m2)
@@ -429,6 +454,9 @@ div_gwas <- function(df, snp, type, svd, npcs){
  return(gwaspc)
 }
 #' Verbose?
 #' @importFrom bigassertr printf
 printf2 <- function(verbose, ...) if (verbose) { printf(...) }
 #' Create a quantile-quantile plot with ggplot2.
 #'
@@ -544,17 +572,27 @@ round_xy <- function(x, y, cl = NA, cu = NA, roundby = 0.001){
  }
 }
-get_manhattan <- function(log10p, snp, thresh){
+get_manhattan <- function(X, ind, snp, thresh){
  roundFBM <- function(X, ind, at) ceiling(X[, ind] / at) * at
  observed <- big_apply(X, ind = ind, a.FUN = roundFBM, at = 0.01,
                        a.combine = 'plus')
  plot_data <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos,
-                    marker.ID = snp$map$marker.ID, log10p = log10p) %>%
+                    marker.ID = snp$map$marker.ID, observed = observed)
-    mutate(observed = round2(.data$log10p, at = 0.001)) %>%
+
  if (length(unique(snp$map$physical.pos)) >= 500000) {
    plot_data <- plot_data %>%
      mutate(POS = round2(.data$POS, at = 250000))
  }
  plot_data <- plot_data %>%
    group_by(.data$CHR, .data$POS, .data$observed) %>%
-    slice(1)
+    slice(1) %>%
    mutate(CHR = as.factor(.data$CHR))
  nchr <- length(unique(plot_data$CHR))
  p1 <- plot_data %>%
-    ggplot(aes(x = .data$POS, y = .data$log10p)) +
+    ggplot(aes(x = .data$POS, y = .data$observed)) +
    geom_point(aes(color = .data$CHR, fill = .data$CHR)) +
    geom_hline(yintercept = thresh, color = "black", linetype = 2,
               size = 1) +
@@ -583,7 +621,7 @@ get_manhattan <- function(log10p, snp, thresh){
          strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0),
          strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) +
    labs(x = "Chromosome", y = "-log10(p value)") +
-    scale_x_continuous(expand = c(0.2, 0.2))
+    scale_x_continuous(expand = c(0.15, 0.15))
  return(p1)
 }
@@ -737,10 +775,9 @@ get_lambdagc <- function(ps, tol = 1e-8){
 }
-#' Return best number of PCs in terms of lambda_GC for Panicum virgatum.
+#' Return best number of PCs in terms of lambda_GC
 #' Return best number of PCs in terms of lambda_GC for the CDBN.
 #'
-#' @description Given a dataframe created using pvdiv_lambda_GC, this function
+#' @description Given a dataframe created using div_lambda_GC, this function
 #'     returns the first lambda_GC less than 1.05, or the smallest lambda_GC,
 #'     for each column in the dataframe.
 #'
@@ -812,3 +849,35 @@ check_gwas <- function(df1, phename, type, nPhe, minphe, nLev){
  }
  return(gwas_ok)
 }
 ## @title Basic sanity check for covariance matrices
 ## @param X input matrix
 check_covmat_basics = function(x) {
  label = substitute(x)
  if (!is.matrix(x))
    labelled_stop(label, "is not a matrix")
  if (!is.numeric(x))
    labelled_stop(label, "is not a numeric matrix")
  if (any(is.na(x)))
    labelled_stop(label, "cannot contain NA values")
  if (any(is.infinite(x)))
    labelled_stop(label, "cannot contain Inf values")
  if (any(is.nan(x)))
    labelled_stop(label, "cannot contain NaN values")
  if (nrow(x) != ncol(x))
    labelled_stop(label, "is not a square matrix")
  if (!isSymmetric(x, check.attributes = FALSE))
    labelled_stop(label, "is not a symmetric matrix")
  return(TRUE)
 }
 ## @title check matrix for positive definitness
 ## @param X input matrix
 check_positive_definite = function(x) {
  check_covmat_basics(x)
  tryCatch(chol(x),
           error = function(e) labelled_stop(substitute(x),
                                             "must be positive definite"))
  return(TRUE)
 }
--- a/man/dive_phe2mash.Rd
+++ b/man/dive_phe2mash.Rd
@@ -20,7 +20,8 @@ dive_phe2mash(
  scale.phe = TRUE,
  roll.size = 50,
  U.ed = NA,
-  U.hyp = NA
+  U.hyp = NA,
  verbose = TRUE
 )
 }
 \arguments{
@@ -73,6 +74,8 @@ generating these once and reusing them for multiple mash runs can save time.}
 \item{U.hyp}{Other covariance matrices for mash. Specify these as a list. These
 matrices must have dimensions that match the number of phenotypes where
 univariate GWAS ran successfully.}
 \item{verbose}{Output some information on the iterations? Default is \code{TRUE}.}
 }
 \value{
 A mash object made up of all phenotypes where univariate GWAS ran
--- a/man/expand_cov.Rd
+++ b/man/expand_cov.Rd
@@ -0,0 +1,29 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{expand_cov}
 \alias{expand_cov}
 \title{Create expanded list of covariance matrices expanded by
 grid, Sigma_{lk} = omega_l U_k}
 \usage{
 expand_cov(Ulist, grid, usepointmass = TRUE)
 }
 \arguments{
 \item{Ulist}{a list of covarance matrices}
 \item{grid}{a grid of scalar values by which the covariance
 matrices are to be sc}
 \item{usepointmass}{if TRUE adds a point mass at 0 (null component)
 to the list}
 }
 \value{
 This takes the covariance matrices in Ulist and multiplies
 them by the grid values If usepointmass is TRUE then it adds a null
 component.
 }
 \description{
 This is an internal (non-exported) function. This help
 page provides additional documentation mainly intended for
 developers and expert users.
 }
 \keyword{internal}
--- a/man/get_GxE.Rd
+++ b/man/get_GxE.Rd
@@ -0,0 +1,39 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_GxE}
 \alias{get_GxE}
 \title{Get data frames of types of GxE from a mash object}
 \usage{
 get_GxE(m, factor = 0.4, thresh = 0.05)
 }
 \arguments{
 \item{m}{An object of type mash}
 \item{factor}{a number between 0 and 1. The factor within which effects are
 considered to be shared.}
 \item{thresh}{Numeric. The threshold for including an effect in the assessment}
 }
 \value{
 A list containing eight data frames. Those with names that start
 "S_" contain significant effects of different types between pairs of
 named rows and columns. S_all_pairwise contains all significant effects;
 NS_pairwise contains all non-significant effects. S_CN contains effects
 significant in only one condition, and effects with a significantly
 different magnitude (differential sensitivity). This dataframe is not
 conservative using the local false sign rate test - we can't determine
 the sign of one of the effects for effects significant in only one
 condition - so it's not recommended to use this, but included. S_2_no
 contains effects significant in both conditions that do not differ
 significantly in magnitude. These effects do not have GxE. S_AP contains
 effects significant in both conditions that differ in their sign - and
 have antagonistic pleiotropy. S_DS contains effects significant in both
 conditions that differ in the magnitude of their effect, but not their
 sign - differentially sensitive alleles. S_1_row and S_1_col contain
 effects that are significant in just one of the two conditions - the row
 or the column, respectively.
 }
 \description{
 Performs set operations to determine pairwise GxE for effects
 from a mash object.
 }
--- a/man/get_U_by_mass.Rd
+++ b/man/get_U_by_mass.Rd
@@ -0,0 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_U_by_mass}
 \alias{get_U_by_mass}
 \title{Get the positions of objects in a mash object Ulist that are above
 some mass threshold.}
 \usage{
 get_U_by_mass(m, thresh = 0.05)
 }
 \arguments{
 \item{m}{An object of type mash}
 \item{thresh}{Numeric. The mass threshold for including a covariance matrix}
 }
 \description{
 Get the positions of objects in a mash object Ulist that are
 above some mass threshold.
 }
--- a/man/get_best_PC_df.Rd
+++ b/man/get_best_PC_df.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/wrapper.R
 \name{get_best_PC_df}
 \alias{get_best_PC_df}
-\title{Return best number of PCs in terms of lambda_GC for Panicum virgatum.
+\title{Return best number of PCs in terms of lambda_GC}
 Return best number of PCs in terms of lambda_GC for the CDBN.}
 \usage{
 get_best_PC_df(df)
 }
@@ -16,7 +15,7 @@ A dataframe containing the best lambda_GC value and number of PCs
 for each phenotype in the data frame.
 }
 \description{
-Given a dataframe created using pvdiv_lambda_GC, this function
+Given a dataframe created using div_lambda_GC, this function
 returns the first lambda_GC less than 1.05, or the smallest lambda_GC,
 for each column in the dataframe.
 }
--- a/man/get_colnames.Rd
+++ b/man/get_colnames.Rd
@@ -0,0 +1,24 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_colnames}
 \alias{get_colnames}
 \title{Get column names from a mash object}
 \usage{
 get_colnames(m)
 }
 \arguments{
 \item{m}{An object of type mash}
 }
 \value{
 A vector of phenotype names
 }
 \description{
 This function extracts the column names from the local false
 sign rate table of a mash object's results. This can tell you the condition
 names or phenotype names used in the mash object. That can be useful for
 looking at a subset of these columns, say.
 }
 \examples{
    \dontrun{get_colnames(m = mash_obj)}
 }
--- a/man/get_date_filename.Rd
+++ b/man/get_date_filename.Rd
@@ -0,0 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_date_filename}
 \alias{get_date_filename}
 \title{Get current date-time in a filename-appropriate format.}
 \usage{
 get_date_filename()
 }
 \value{
 A string containing the current date-time with spaces and colons
 replaced with underscores and periods, respectively.
 }
 \description{
 Converts the current \code{Sys.time()} system time to a format
 that is acceptable to include in a filename. Changes punctuation that
 won't work in a filename.
 }
--- a/man/get_estimated_pi.Rd
+++ b/man/get_estimated_pi.Rd
@@ -0,0 +1,38 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_estimated_pi}
 \alias{get_estimated_pi}
 \title{Return the estimated mixture proportions. Use get_estimated_pi to
 extract the estimates of the mixture proportions for different types of
 covariance matrix. This tells you which covariance matrices have most of
 the mass.}
 \usage{
 get_estimated_pi(m, dimension = c("cov", "grid", "all"))
 }
 \arguments{
 \item{m}{the mash result}
 \item{dimension}{indicates whether you want the mixture proportions for the
 covariances, grid, or all}
 }
 \value{
 a named vector containing the estimated mixture proportions.
 }
 \description{
 Return the estimated mixture proportions. Use get_estimated_pi to
 extract the estimates of the mixture proportions for different types of
 covariance matrix. This tells you which covariance matrices have most of
 the mass.
 }
 \details{
 If the fit was done with \code{usepointmass=TRUE} then the first
 element of the returned vector will correspond to the null, and the
 remaining elements to the non-null covariance matrices. Suppose the fit
 was done with $K$ covariances and a grid of length $L$. If
 \code{dimension=cov} then the returned vector will be of length $K$
 (or $K+1$ if \code{usepointmass=TRUE}).  If \code{dimension=grid} then
 the returned vector will be of length $L$ (or $L+1$).  If
 \code{dimension=all} then the returned vector will be of length $LK$ (or
 $LK+1$). The names of the vector will be informative for which
 combination each element corresponds to.
 }
--- a/man/get_log10bf.Rd
+++ b/man/get_log10bf.Rd
@@ -0,0 +1,21 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_log10bf}
 \alias{get_log10bf}
 \title{Return the Bayes Factor for each effect}
 \usage{
 get_log10bf(m)
 }
 \arguments{
 \item{m}{the mash result (from joint or 1by1 analysis); must have been
 computed using usepointmass = TRUE}
 }
 \value{
 if m was fitted using usepointmass=TRUE then returns a vector of
 the log10(bf) values for each effect. That is, the jth element
 lbf_j is log10(Pr(Bj | g = ghat-nonnull)/Pr(Bj | g = 0)) where gha
 t-nonnull is the non-null part of ghat.  Otherwise returns NULL.
 }
 \description{
 Return the Bayes Factor for each effect
 }
--- a/man/get_marker_df.Rd
+++ b/man/get_marker_df.Rd
@@ -0,0 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_marker_df}
 \alias{get_marker_df}
 \title{Get mash marker_df}
 \usage{
 get_marker_df(m, snp)
 }
 \arguments{
 \item{m}{An object of type mash}
 \item{snp}{A bigSNP object, produced by the bigsnpr package. Here, the WAMI
 SNP information.}
 }
 \description{
 Pulls SNP markers information in the mash object from a bigsnp
 object.
 }
--- a/man/get_n_significant_conditions.Rd
+++ b/man/get_n_significant_conditions.Rd
@@ -0,0 +1,28 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_n_significant_conditions}
 \alias{get_n_significant_conditions}
 \title{Count number of conditions each effect is significant in}
 \usage{
 get_n_significant_conditions(
  m,
  thresh = 0.05,
  conditions = NULL,
  sig_fn = get_lfsr
 )
 }
 \arguments{
 \item{m}{the mash result (from joint or 1by1 analysis)}
 \item{thresh}{indicates the threshold below which to call signals significant}
 \item{conditions}{which conditions to include in check (default to all)}
 \item{sig_fn}{the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr}
 }
 \value{
 a vector containing the number of significant conditions
 }
 \description{
 Count number of conditions each effect is significant in
 }
--- a/man/get_ncond.Rd
+++ b/man/get_ncond.Rd
@@ -0,0 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_ncond}
 \alias{get_ncond}
 \title{Get number of conditions}
 \usage{
 get_ncond(m)
 }
 \arguments{
 \item{m}{The mash result}
 }
 \description{
 Get number of conditions
 }
--- a/man/get_pairwise_sharing.Rd
+++ b/man/get_pairwise_sharing.Rd
@@ -0,0 +1,40 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_pairwise_sharing}
 \alias{get_pairwise_sharing}
 \title{Compute the proportion of (significant) signals shared by magnitude in each pair of conditions, based on the poterior mean}
 \usage{
 get_pairwise_sharing(m, factor = 0.5, lfsr_thresh = 0.05, FUN = identity)
 }
 \arguments{
 \item{m}{the mash fit}
 \item{factor}{a number between 0 and 1 - the factor within which effects are
 considered to be shared.}
 \item{lfsr_thresh}{the lfsr threshold for including an effect in the
 assessment}
 \item{FUN}{a function to be applied to the estimated effect sizes before
 assessing sharing. The most obvious choice beside the default
 'FUN=identity' would be 'FUN=abs' if you want to ignore the sign of the
 effects when assesing sharing.}
 }
 \description{
 Compute the proportion of (significant) signals shared by magnitude in each pair of conditions, based on the poterior mean
 }
 \details{
 For each pair of tissues, first identify the effects that are
 significant (by lfsr<lfsr_thresh) in at least one of the two tissues.
 Then compute what fraction of these have an estimated (posterior mean)
 effect size within a factor \code{factor} of one another. The results are
 returned as an R by R matrix.
 }
 \examples{
 \dontrun{
 get_pairwise_sharing(m) # sharing by magnitude (same sign)
 get_pairwise_sharing(m, factor=0) # sharing by sign
 get_pairwise_sharing(m, FUN=abs) # sharing by magnitude when sign is ignored
 }
 }
--- a/man/get_significant_results.Rd
+++ b/man/get_significant_results.Rd
@@ -0,0 +1,42 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{get_significant_results}
 \alias{get_significant_results}
 \title{From a mash result, get effects that are significant in at least
 one condition.}
 \usage{
 get_significant_results(
  m,
  thresh = 0.05,
  conditions = NULL,
  sig_fn = ashr::get_lfsr
 )
 get_significant_results(
  m,
  thresh = 0.05,
  conditions = NULL,
  sig_fn = ashr::get_lfsr
 )
 }
 \arguments{
 \item{m}{the mash result (from joint or 1by1 analysis)}
 \item{thresh}{indicates the threshold below which to call signals significant}
 \item{conditions}{which conditions to include in check (default to all)}
 \item{sig_fn}{the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr. (Small values must indicate significant.)}
 }
 \value{
 a vector containing the indices of the significant effects, by
 order of most significant to least
 a vector containing the indices of the significant effects, by order of most significant to least
 }
 \description{
 From a mash result, get effects that are significant in at least
 one condition.
 From a mash result, get effects that are significant in at least one condition
 }
--- a/man/mash_plot_Ulist.Rd
+++ b/man/mash_plot_Ulist.Rd
@@ -0,0 +1,35 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{mash_plot_Ulist}
 \alias{mash_plot_Ulist}
 \title{ggplot of specific covariance matrix patterns}
 \usage{
 mash_plot_Ulist(
  m,
  range = NA,
  saveoutput = FALSE,
  suffix = "",
  limits = TRUE,
  labels = TRUE
 )
 }
 \arguments{
 \item{m}{An object of type mash}
 \item{range}{Numeric vector. Which covariance matrices should be plotted?}
 \item{saveoutput}{Logical. Should the output be saved to the path?}
 \item{suffix}{Character. Optional. A unique suffix used to save the files,
 instead of the current date & time.}
 \item{limits}{should there be plot limits of -1 and 1? Default is true.}
 }
 \value{
 A list of dataframes used to make the tile plots and the plots
 themselves.
 }
 \description{
 Creates a tile plot using ggplot of the covariance matrices
 specified in the mash model.
 }
--- a/man/mash_plot_covar.Rd
+++ b/man/mash_plot_covar.Rd
@@ -0,0 +1,27 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{mash_plot_covar}
 \alias{mash_plot_covar}
 \title{ggplot of covariance matrix masses}
 \usage{
 mash_plot_covar(m, saveoutput = FALSE, suffix = "")
 }
 \arguments{
 \item{m}{An object of type mash}
 \item{saveoutput}{Logical. Should the output be saved to the path?}
 \item{suffix}{Character. Optional. A unique suffix used to save the files,
 instead of the current date & time.}
 }
 \description{
 Creates a bar plot using ggplot of the masses that are on each
 covariance matrix specified in the mash model.
 }
 \note{
 This plot can be useful for seeing the overall patterns of effects in
 the data used in mash. Non-significant effects will add mass to the
 "no_effects" covariance matrix, while significant effects will add mass
 to one of the other covariance matrices. You can use mash_plot_Ulist()
 to plot the covariance matrix patterns themselves.
 }
--- a/man/mash_plot_manhattan_by_condition.Rd
+++ b/man/mash_plot_manhattan_by_condition.Rd
@@ -0,0 +1,47 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{mash_plot_manhattan_by_condition}
 \alias{mash_plot_manhattan_by_condition}
 \title{Manhattan plot in ggplot colored by significant conditions}
 \usage{
 mash_plot_manhattan_by_condition(
  m,
  snp,
  cond = NA,
  saveoutput = FALSE,
  suffix = "",
  thresh = 0.05
 )
 }
 \arguments{
 \item{m}{A mash object (outputted by mash).}
 \item{snp}{A bigSNP object, produced by the bigsnpr package. Here,
 the WAMI SNP information.}
 \item{cond}{A vector of phenotypes. Defaults to the names of each
 column in the mash object.}
 \item{saveoutput}{Logical. Should the output be saved to the path?}
 \item{suffix}{Character. Optional. A unique suffix used to save the files,
 instead of the current date & time.}
 \item{thresh}{Numeric. The threshold used for the local false sign rate to
 call significance in a condition.}
 }
 \value{
 A \code{tbl_df()} of the data used to make the Manhattan plot, and a
 ggplot object containing the Manhattan.
 }
 \description{
 Takes a mash object and, for some vector of phenotypes, returns
 a Manhattan plot ggplot object (and its dataframe). Each SNP in the plot
 is colored by the number of phenotypes it is significant for. Even and
 odd chromosomes have different shapes for their SNPs, so that
 chromosome identity can be determined.
 }
 \examples{
 \dontrun{manhattan_out <- mash_ggman_by_condition(m = m, saveoutput = TRUE)}
 }
--- a/man/mash_plot_marker_effect.Rd
+++ b/man/mash_plot_marker_effect.Rd
@@ -0,0 +1,44 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{mash_plot_marker_effect}
 \alias{mash_plot_marker_effect}
 \title{ggplot of single mash effect}
 \usage{
 mash_plot_marker_effect(
  m,
  snp = snp,
  n = NA,
  i = NA,
  marker = TRUE,
  saveoutput = FALSE,
  suffix = ""
 )
 }
 \arguments{
 \item{m}{An object of type mash}
 \item{snp}{A bigSNP object, produced by the bigsnpr package. Here,
 the WAMI SNP information.}
 \item{n}{Optional. Integer or integer vector. The result number to plot, in
 order of significance. 1 would be the top result, for example. Find
 these with \code{\link{get_significant_results}}.}
 \item{i}{Optional. Integer or integer vector. The result number to plot, in
 the order of the mash object. 1 would be the first marker in the mash
 object, for example. Find these with \code{\link{get_marker_df}}.}
 \item{marker}{Optional. Print the marker name on the plot?}
 \item{saveoutput}{Logical. Should the output be saved to the path?}
 \item{suffix}{Character. Optional. A unique suffix used to save the files,
 instead of the current date & time.}
 }
 \description{
 Creates a plot with point estimates and standard errors for
 effects of a single SNP in multiple conditions.
 }
 \note{
 Specify only one of n or i.
 }
--- a/man/mash_plot_pairwise_sharing.Rd
+++ b/man/mash_plot_pairwise_sharing.Rd
@@ -0,0 +1,45 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{mash_plot_pairwise_sharing}
 \alias{mash_plot_pairwise_sharing}
 \title{Create a ggplot of pairwise sharing of mash effects}
 \usage{
 mash_plot_pairwise_sharing(
  m = NULL,
  effectRDS = NULL,
  corrmatrix = NULL,
  reorder = TRUE,
  saveoutput = FALSE,
  filename = NA,
  suffix = "",
  ...
 )
 }
 \arguments{
 \item{m}{An object of type mash}
 \item{effectRDS}{An RDS containing a correlation matrix.}
 \item{corrmatrix}{A correlation matrix}
 \item{reorder}{Logical. Should the columns be reordered by similarity?}
 \item{saveoutput}{Logical. Should the output be saved to the path?}
 \item{filename}{Character string with an output filename. Optional.}
 \item{suffix}{Character. Optional. A unique suffix used to save the files,
 instead of the current date & time.}
 \item{...}{Other arguments to \code{\link{get_pairwise_sharing}} or
 \code{\link{ggcorr}}.}
 }
 \value{
 A list containing a dataframe containing the correlations and a
 ggplot2 object containing the correlation plot.
 }
 \description{
 Given a correlation matrix, an RDS with a correlation matrix, or
 a mash object, create a ggplot of pairwise sharing of mash effects using
 \code{\link{get_pairwise_sharing}} and \code{\link{ggcorr}}.
 }
--- a/man/mash_plot_sig_by_condition.Rd
+++ b/man/mash_plot_sig_by_condition.Rd
@@ -0,0 +1,40 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{mash_plot_sig_by_condition}
 \alias{mash_plot_sig_by_condition}
 \title{Significant SNPs per number of conditions}
 \usage{
 mash_plot_sig_by_condition(
  m,
  conditions = NA,
  saveoutput = FALSE,
  suffix = "",
  thresh = 0.05
 )
 }
 \arguments{
 \item{m}{An object of type mash}
 \item{conditions}{A vector of conditions. Get these with get_colnames(m).}
 \item{saveoutput}{Logical. Save plot output to a file? Default is FALSE.}
 \item{suffix}{Character. Optional. A unique suffix used to save the files,
 instead of the current date & time.}
 \item{thresh}{What is the threshold to call an effect significant? Default
 is 0.05.}
 }
 \value{
 A list containing a dataframe of the number of SNPs significant per
 number of conditions, and a ggplot object using that dataframe.
 }
 \description{
 For some number of columns in a mash object that correspond to
 conditions, find the number of SNPs that are significant for that number
 of conditions.
 }
 \examples{
  \dontrun{mash_plot_sig_by_condition(m = mash_obj, saveoutput = TRUE)}
 }
--- a/man/printf2.Rd
+++ b/man/printf2.Rd
@@ -0,0 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/wrapper.R
 \name{printf2}
 \alias{printf2}
 \title{Verbose?}
 \usage{
 printf2(verbose, ...)
 }
 \description{
 Verbose?
 }
--- a/man/reorder_cormat.Rd
+++ b/man/reorder_cormat.Rd
@@ -0,0 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{reorder_cormat}
 \alias{reorder_cormat}
 \title{Reorder correlation matrix}
 \usage{
 reorder_cormat(cormat)
 }
 \arguments{
 \item{cormat}{A correlation matrix}
 }
 \description{
 Reorder correlation coefficients from a matrix of things
 (including NA's) and hierarchically cluster them
 }
--- a/man/scale_cov.Rd
+++ b/man/scale_cov.Rd
@@ -0,0 +1,23 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mash-evaluation.R
 \name{scale_cov}
 \alias{scale_cov}
 \title{Scale each covariance matrix in list Ulist by a scalar in
 vector grid}
 \usage{
 scale_cov(Ulist, grid)
 }
 \arguments{
 \item{Ulist}{a list of matrices}
 \item{grid}{a vector of scaling factors (standard deviaions)}
 }
 \value{
 a list with length length(Ulist)*length(grid)
 }
 \description{
 This is an internal (non-exported) function. This help
 page provides additional documentation mainly intended for
 developers and expert users.
 }
 \keyword{internal}