Error checks and plotting speedups for dive_phe2mash function

2021-03-31 18:17:02 -05:00
parent 027323acf6
commit 1edd72d15e
26 changed files with 779 additions and 63 deletions
--- a/man/dive_phe2mash.Rd
+++ b/man/dive_phe2mash.Rd
@@ -20,7 +20,8 @@ dive_phe2mash(
  scale.phe = TRUE,
  roll.size = 50,
  U.ed = NA,
-  U.hyp = NA
+  U.hyp = NA,
+  verbose = TRUE
 )
 }
 \arguments{
@@ -73,6 +74,8 @@ generating these once and reusing them for multiple mash runs can save time.}
 \item{U.hyp}{Other covariance matrices for mash. Specify these as a list. These
 matrices must have dimensions that match the number of phenotypes where
 univariate GWAS ran successfully.}
+
+\item{verbose}{Output some information on the iterations? Default is \code{TRUE}.}
 }
 \value{
 A mash object made up of all phenotypes where univariate GWAS ran
--- a/man/expand_cov.Rd
+++ b/man/expand_cov.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{expand_cov}
+\alias{expand_cov}
+\title{Create expanded list of covariance matrices expanded by
+grid, Sigma_{lk} = omega_l U_k}
+\usage{
+expand_cov(Ulist, grid, usepointmass = TRUE)
+}
+\arguments{
+\item{Ulist}{a list of covarance matrices}
+
+\item{grid}{a grid of scalar values by which the covariance
+matrices are to be sc}
+
+\item{usepointmass}{if TRUE adds a point mass at 0 (null component)
+to the list}
+}
+\value{
+This takes the covariance matrices in Ulist and multiplies
+them by the grid values If usepointmass is TRUE then it adds a null
+component.
+}
+\description{
+This is an internal (non-exported) function. This help
+page provides additional documentation mainly intended for
+developers and expert users.
+}
+\keyword{internal}
--- a/man/get_GxE.Rd
+++ b/man/get_GxE.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_GxE}
+\alias{get_GxE}
+\title{Get data frames of types of GxE from a mash object}
+\usage{
+get_GxE(m, factor = 0.4, thresh = 0.05)
+}
+\arguments{
+\item{m}{An object of type mash}
+
+\item{factor}{a number between 0 and 1. The factor within which effects are
+considered to be shared.}
+
+\item{thresh}{Numeric. The threshold for including an effect in the assessment}
+}
+\value{
+A list containing eight data frames. Those with names that start
+"S_" contain significant effects of different types between pairs of
+named rows and columns. S_all_pairwise contains all significant effects;
+NS_pairwise contains all non-significant effects. S_CN contains effects
+significant in only one condition, and effects with a significantly
+different magnitude (differential sensitivity). This dataframe is not
+conservative using the local false sign rate test - we can't determine
+the sign of one of the effects for effects significant in only one
+condition - so it's not recommended to use this, but included. S_2_no
+contains effects significant in both conditions that do not differ
+significantly in magnitude. These effects do not have GxE. S_AP contains
+effects significant in both conditions that differ in their sign - and
+have antagonistic pleiotropy. S_DS contains effects significant in both
+conditions that differ in the magnitude of their effect, but not their
+sign - differentially sensitive alleles. S_1_row and S_1_col contain
+effects that are significant in just one of the two conditions - the row
+or the column, respectively.
+}
+\description{
+Performs set operations to determine pairwise GxE for effects
+from a mash object.
+}
--- a/man/get_U_by_mass.Rd
+++ b/man/get_U_by_mass.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_U_by_mass}
+\alias{get_U_by_mass}
+\title{Get the positions of objects in a mash object Ulist that are above
+some mass threshold.}
+\usage{
+get_U_by_mass(m, thresh = 0.05)
+}
+\arguments{
+\item{m}{An object of type mash}
+
+\item{thresh}{Numeric. The mass threshold for including a covariance matrix}
+}
+\description{
+Get the positions of objects in a mash object Ulist that are
+above some mass threshold.
+}
--- a/man/get_best_PC_df.Rd
+++ b/man/get_best_PC_df.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/wrapper.R
 \name{get_best_PC_df}
 \alias{get_best_PC_df}
-\title{Return best number of PCs in terms of lambda_GC for Panicum virgatum.
-Return best number of PCs in terms of lambda_GC for the CDBN.}
+\title{Return best number of PCs in terms of lambda_GC}
 \usage{
 get_best_PC_df(df)
 }
@@ -16,7 +15,7 @@ A dataframe containing the best lambda_GC value and number of PCs
 for each phenotype in the data frame.
 }
 \description{
-Given a dataframe created using pvdiv_lambda_GC, this function
+Given a dataframe created using div_lambda_GC, this function
 returns the first lambda_GC less than 1.05, or the smallest lambda_GC,
 for each column in the dataframe.
 }
--- a/man/get_colnames.Rd
+++ b/man/get_colnames.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_colnames}
+\alias{get_colnames}
+\title{Get column names from a mash object}
+\usage{
+get_colnames(m)
+}
+\arguments{
+\item{m}{An object of type mash}
+}
+\value{
+A vector of phenotype names
+}
+\description{
+This function extracts the column names from the local false
+sign rate table of a mash object's results. This can tell you the condition
+names or phenotype names used in the mash object. That can be useful for
+looking at a subset of these columns, say.
+}
+\examples{
+    \dontrun{get_colnames(m = mash_obj)}
+
+}
--- a/man/get_date_filename.Rd
+++ b/man/get_date_filename.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_date_filename}
+\alias{get_date_filename}
+\title{Get current date-time in a filename-appropriate format.}
+\usage{
+get_date_filename()
+}
+\value{
+A string containing the current date-time with spaces and colons
+replaced with underscores and periods, respectively.
+}
+\description{
+Converts the current \code{Sys.time()} system time to a format
+that is acceptable to include in a filename. Changes punctuation that
+won't work in a filename.
+}
--- a/man/get_estimated_pi.Rd
+++ b/man/get_estimated_pi.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_estimated_pi}
+\alias{get_estimated_pi}
+\title{Return the estimated mixture proportions. Use get_estimated_pi to
+extract the estimates of the mixture proportions for different types of
+covariance matrix. This tells you which covariance matrices have most of
+the mass.}
+\usage{
+get_estimated_pi(m, dimension = c("cov", "grid", "all"))
+}
+\arguments{
+\item{m}{the mash result}
+
+\item{dimension}{indicates whether you want the mixture proportions for the
+covariances, grid, or all}
+}
+\value{
+a named vector containing the estimated mixture proportions.
+}
+\description{
+Return the estimated mixture proportions. Use get_estimated_pi to
+extract the estimates of the mixture proportions for different types of
+covariance matrix. This tells you which covariance matrices have most of
+the mass.
+}
+\details{
+If the fit was done with \code{usepointmass=TRUE} then the first
+element of the returned vector will correspond to the null, and the
+remaining elements to the non-null covariance matrices. Suppose the fit
+was done with $K$ covariances and a grid of length $L$. If
+\code{dimension=cov} then the returned vector will be of length $K$
+(or $K+1$ if \code{usepointmass=TRUE}).  If \code{dimension=grid} then
+the returned vector will be of length $L$ (or $L+1$).  If
+\code{dimension=all} then the returned vector will be of length $LK$ (or
+$LK+1$). The names of the vector will be informative for which
+combination each element corresponds to.
+}
--- a/man/get_log10bf.Rd
+++ b/man/get_log10bf.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_log10bf}
+\alias{get_log10bf}
+\title{Return the Bayes Factor for each effect}
+\usage{
+get_log10bf(m)
+}
+\arguments{
+\item{m}{the mash result (from joint or 1by1 analysis); must have been
+computed using usepointmass = TRUE}
+}
+\value{
+if m was fitted using usepointmass=TRUE then returns a vector of
+the log10(bf) values for each effect. That is, the jth element
+lbf_j is log10(Pr(Bj | g = ghat-nonnull)/Pr(Bj | g = 0)) where gha
+t-nonnull is the non-null part of ghat.  Otherwise returns NULL.
+}
+\description{
+Return the Bayes Factor for each effect
+}
--- a/man/get_marker_df.Rd
+++ b/man/get_marker_df.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_marker_df}
+\alias{get_marker_df}
+\title{Get mash marker_df}
+\usage{
+get_marker_df(m, snp)
+}
+\arguments{
+\item{m}{An object of type mash}
+
+\item{snp}{A bigSNP object, produced by the bigsnpr package. Here, the WAMI
+SNP information.}
+}
+\description{
+Pulls SNP markers information in the mash object from a bigsnp
+object.
+}
--- a/man/get_n_significant_conditions.Rd
+++ b/man/get_n_significant_conditions.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_n_significant_conditions}
+\alias{get_n_significant_conditions}
+\title{Count number of conditions each effect is significant in}
+\usage{
+get_n_significant_conditions(
+  m,
+  thresh = 0.05,
+  conditions = NULL,
+  sig_fn = get_lfsr
+)
+}
+\arguments{
+\item{m}{the mash result (from joint or 1by1 analysis)}
+
+\item{thresh}{indicates the threshold below which to call signals significant}
+
+\item{conditions}{which conditions to include in check (default to all)}
+
+\item{sig_fn}{the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr}
+}
+\value{
+a vector containing the number of significant conditions
+}
+\description{
+Count number of conditions each effect is significant in
+}
--- a/man/get_ncond.Rd
+++ b/man/get_ncond.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_ncond}
+\alias{get_ncond}
+\title{Get number of conditions}
+\usage{
+get_ncond(m)
+}
+\arguments{
+\item{m}{The mash result}
+}
+\description{
+Get number of conditions
+}
--- a/man/get_pairwise_sharing.Rd
+++ b/man/get_pairwise_sharing.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_pairwise_sharing}
+\alias{get_pairwise_sharing}
+\title{Compute the proportion of (significant) signals shared by magnitude in each pair of conditions, based on the poterior mean}
+\usage{
+get_pairwise_sharing(m, factor = 0.5, lfsr_thresh = 0.05, FUN = identity)
+}
+\arguments{
+\item{m}{the mash fit}
+
+\item{factor}{a number between 0 and 1 - the factor within which effects are
+considered to be shared.}
+
+\item{lfsr_thresh}{the lfsr threshold for including an effect in the
+assessment}
+
+\item{FUN}{a function to be applied to the estimated effect sizes before
+assessing sharing. The most obvious choice beside the default
+'FUN=identity' would be 'FUN=abs' if you want to ignore the sign of the
+effects when assesing sharing.}
+}
+\description{
+Compute the proportion of (significant) signals shared by magnitude in each pair of conditions, based on the poterior mean
+}
+\details{
+For each pair of tissues, first identify the effects that are
+significant (by lfsr<lfsr_thresh) in at least one of the two tissues.
+Then compute what fraction of these have an estimated (posterior mean)
+effect size within a factor \code{factor} of one another. The results are
+returned as an R by R matrix.
+}
+\examples{
+\dontrun{
+get_pairwise_sharing(m) # sharing by magnitude (same sign)
+get_pairwise_sharing(m, factor=0) # sharing by sign
+get_pairwise_sharing(m, FUN=abs) # sharing by magnitude when sign is ignored
+}
+
+}
--- a/man/get_significant_results.Rd
+++ b/man/get_significant_results.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{get_significant_results}
+\alias{get_significant_results}
+\title{From a mash result, get effects that are significant in at least
+one condition.}
+\usage{
+get_significant_results(
+  m,
+  thresh = 0.05,
+  conditions = NULL,
+  sig_fn = ashr::get_lfsr
+)
+
+get_significant_results(
+  m,
+  thresh = 0.05,
+  conditions = NULL,
+  sig_fn = ashr::get_lfsr
+)
+}
+\arguments{
+\item{m}{the mash result (from joint or 1by1 analysis)}
+
+\item{thresh}{indicates the threshold below which to call signals significant}
+
+\item{conditions}{which conditions to include in check (default to all)}
+
+\item{sig_fn}{the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr. (Small values must indicate significant.)}
+}
+\value{
+a vector containing the indices of the significant effects, by
+order of most significant to least
+
+a vector containing the indices of the significant effects, by order of most significant to least
+}
+\description{
+From a mash result, get effects that are significant in at least
+one condition.
+
+From a mash result, get effects that are significant in at least one condition
+}
--- a/man/mash_plot_Ulist.Rd
+++ b/man/mash_plot_Ulist.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{mash_plot_Ulist}
+\alias{mash_plot_Ulist}
+\title{ggplot of specific covariance matrix patterns}
+\usage{
+mash_plot_Ulist(
+  m,
+  range = NA,
+  saveoutput = FALSE,
+  suffix = "",
+  limits = TRUE,
+  labels = TRUE
+)
+}
+\arguments{
+\item{m}{An object of type mash}
+
+\item{range}{Numeric vector. Which covariance matrices should be plotted?}
+
+\item{saveoutput}{Logical. Should the output be saved to the path?}
+
+\item{suffix}{Character. Optional. A unique suffix used to save the files,
+instead of the current date & time.}
+
+\item{limits}{should there be plot limits of -1 and 1? Default is true.}
+}
+\value{
+A list of dataframes used to make the tile plots and the plots
+themselves.
+}
+\description{
+Creates a tile plot using ggplot of the covariance matrices
+specified in the mash model.
+}
--- a/man/mash_plot_covar.Rd
+++ b/man/mash_plot_covar.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{mash_plot_covar}
+\alias{mash_plot_covar}
+\title{ggplot of covariance matrix masses}
+\usage{
+mash_plot_covar(m, saveoutput = FALSE, suffix = "")
+}
+\arguments{
+\item{m}{An object of type mash}
+
+\item{saveoutput}{Logical. Should the output be saved to the path?}
+
+\item{suffix}{Character. Optional. A unique suffix used to save the files,
+instead of the current date & time.}
+}
+\description{
+Creates a bar plot using ggplot of the masses that are on each
+covariance matrix specified in the mash model.
+}
+\note{
+This plot can be useful for seeing the overall patterns of effects in
+the data used in mash. Non-significant effects will add mass to the
+"no_effects" covariance matrix, while significant effects will add mass
+to one of the other covariance matrices. You can use mash_plot_Ulist()
+to plot the covariance matrix patterns themselves.
+}
--- a/man/mash_plot_manhattan_by_condition.Rd
+++ b/man/mash_plot_manhattan_by_condition.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{mash_plot_manhattan_by_condition}
+\alias{mash_plot_manhattan_by_condition}
+\title{Manhattan plot in ggplot colored by significant conditions}
+\usage{
+mash_plot_manhattan_by_condition(
+  m,
+  snp,
+  cond = NA,
+  saveoutput = FALSE,
+  suffix = "",
+  thresh = 0.05
+)
+}
+\arguments{
+\item{m}{A mash object (outputted by mash).}
+
+\item{snp}{A bigSNP object, produced by the bigsnpr package. Here,
+the WAMI SNP information.}
+
+\item{cond}{A vector of phenotypes. Defaults to the names of each
+column in the mash object.}
+
+\item{saveoutput}{Logical. Should the output be saved to the path?}
+
+\item{suffix}{Character. Optional. A unique suffix used to save the files,
+instead of the current date & time.}
+
+\item{thresh}{Numeric. The threshold used for the local false sign rate to
+call significance in a condition.}
+}
+\value{
+A \code{tbl_df()} of the data used to make the Manhattan plot, and a
+ggplot object containing the Manhattan.
+}
+\description{
+Takes a mash object and, for some vector of phenotypes, returns
+a Manhattan plot ggplot object (and its dataframe). Each SNP in the plot
+is colored by the number of phenotypes it is significant for. Even and
+odd chromosomes have different shapes for their SNPs, so that
+chromosome identity can be determined.
+}
+\examples{
+\dontrun{manhattan_out <- mash_ggman_by_condition(m = m, saveoutput = TRUE)}
+
+}
--- a/man/mash_plot_marker_effect.Rd
+++ b/man/mash_plot_marker_effect.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{mash_plot_marker_effect}
+\alias{mash_plot_marker_effect}
+\title{ggplot of single mash effect}
+\usage{
+mash_plot_marker_effect(
+  m,
+  snp = snp,
+  n = NA,
+  i = NA,
+  marker = TRUE,
+  saveoutput = FALSE,
+  suffix = ""
+)
+}
+\arguments{
+\item{m}{An object of type mash}
+
+\item{snp}{A bigSNP object, produced by the bigsnpr package. Here,
+the WAMI SNP information.}
+
+\item{n}{Optional. Integer or integer vector. The result number to plot, in
+order of significance. 1 would be the top result, for example. Find
+these with \code{\link{get_significant_results}}.}
+
+\item{i}{Optional. Integer or integer vector. The result number to plot, in
+the order of the mash object. 1 would be the first marker in the mash
+object, for example. Find these with \code{\link{get_marker_df}}.}
+
+\item{marker}{Optional. Print the marker name on the plot?}
+
+\item{saveoutput}{Logical. Should the output be saved to the path?}
+
+\item{suffix}{Character. Optional. A unique suffix used to save the files,
+instead of the current date & time.}
+}
+\description{
+Creates a plot with point estimates and standard errors for
+effects of a single SNP in multiple conditions.
+}
+\note{
+Specify only one of n or i.
+}
--- a/man/mash_plot_pairwise_sharing.Rd
+++ b/man/mash_plot_pairwise_sharing.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{mash_plot_pairwise_sharing}
+\alias{mash_plot_pairwise_sharing}
+\title{Create a ggplot of pairwise sharing of mash effects}
+\usage{
+mash_plot_pairwise_sharing(
+  m = NULL,
+  effectRDS = NULL,
+  corrmatrix = NULL,
+  reorder = TRUE,
+  saveoutput = FALSE,
+  filename = NA,
+  suffix = "",
+  ...
+)
+}
+\arguments{
+\item{m}{An object of type mash}
+
+\item{effectRDS}{An RDS containing a correlation matrix.}
+
+\item{corrmatrix}{A correlation matrix}
+
+\item{reorder}{Logical. Should the columns be reordered by similarity?}
+
+\item{saveoutput}{Logical. Should the output be saved to the path?}
+
+\item{filename}{Character string with an output filename. Optional.}
+
+\item{suffix}{Character. Optional. A unique suffix used to save the files,
+instead of the current date & time.}
+
+\item{...}{Other arguments to \code{\link{get_pairwise_sharing}} or
+\code{\link{ggcorr}}.}
+}
+\value{
+A list containing a dataframe containing the correlations and a
+ggplot2 object containing the correlation plot.
+}
+\description{
+Given a correlation matrix, an RDS with a correlation matrix, or
+a mash object, create a ggplot of pairwise sharing of mash effects using
+\code{\link{get_pairwise_sharing}} and \code{\link{ggcorr}}.
+}
--- a/man/mash_plot_sig_by_condition.Rd
+++ b/man/mash_plot_sig_by_condition.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{mash_plot_sig_by_condition}
+\alias{mash_plot_sig_by_condition}
+\title{Significant SNPs per number of conditions}
+\usage{
+mash_plot_sig_by_condition(
+  m,
+  conditions = NA,
+  saveoutput = FALSE,
+  suffix = "",
+  thresh = 0.05
+)
+}
+\arguments{
+\item{m}{An object of type mash}
+
+\item{conditions}{A vector of conditions. Get these with get_colnames(m).}
+
+\item{saveoutput}{Logical. Save plot output to a file? Default is FALSE.}
+
+\item{suffix}{Character. Optional. A unique suffix used to save the files,
+instead of the current date & time.}
+
+\item{thresh}{What is the threshold to call an effect significant? Default
+is 0.05.}
+}
+\value{
+A list containing a dataframe of the number of SNPs significant per
+number of conditions, and a ggplot object using that dataframe.
+}
+\description{
+For some number of columns in a mash object that correspond to
+conditions, find the number of SNPs that are significant for that number
+of conditions.
+}
+\examples{
+  \dontrun{mash_plot_sig_by_condition(m = mash_obj, saveoutput = TRUE)}
+
+}
--- a/man/printf2.Rd
+++ b/man/printf2.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wrapper.R
+\name{printf2}
+\alias{printf2}
+\title{Verbose?}
+\usage{
+printf2(verbose, ...)
+}
+\description{
+Verbose?
+}
--- a/man/reorder_cormat.Rd
+++ b/man/reorder_cormat.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{reorder_cormat}
+\alias{reorder_cormat}
+\title{Reorder correlation matrix}
+\usage{
+reorder_cormat(cormat)
+}
+\arguments{
+\item{cormat}{A correlation matrix}
+}
+\description{
+Reorder correlation coefficients from a matrix of things
+(including NA's) and hierarchically cluster them
+}
--- a/man/scale_cov.Rd
+++ b/man/scale_cov.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mash-evaluation.R
+\name{scale_cov}
+\alias{scale_cov}
+\title{Scale each covariance matrix in list Ulist by a scalar in
+vector grid}
+\usage{
+scale_cov(Ulist, grid)
+}
+\arguments{
+\item{Ulist}{a list of matrices}
+
+\item{grid}{a vector of scaling factors (standard deviaions)}
+}
+\value{
+a list with length length(Ulist)*length(grid)
+}
+\description{
+This is an internal (non-exported) function. This help
+page provides additional documentation mainly intended for
+developers and expert users.
+}
+\keyword{internal}