From 1edd72d15e0a0ba87be0fcc05545c5dd9078e72e Mon Sep 17 00:00:00 2001 From: Alice MacQueen Date: Wed, 31 Mar 2021 18:17:02 -0500 Subject: [PATCH] Error checks and plotting speedups for dive_phe2mash function --- DESCRIPTION | 5 + NAMESPACE | 25 ++++ R/wrapper.R | 187 ++++++++++++++++-------- man/dive_phe2mash.Rd | 5 +- man/expand_cov.Rd | 29 ++++ man/get_GxE.Rd | 39 +++++ man/get_U_by_mass.Rd | 18 +++ man/get_best_PC_df.Rd | 5 +- man/get_colnames.Rd | 24 +++ man/get_date_filename.Rd | 17 +++ man/get_estimated_pi.Rd | 38 +++++ man/get_log10bf.Rd | 21 +++ man/get_marker_df.Rd | 18 +++ man/get_n_significant_conditions.Rd | 28 ++++ man/get_ncond.Rd | 14 ++ man/get_pairwise_sharing.Rd | 40 +++++ man/get_significant_results.Rd | 42 ++++++ man/mash_plot_Ulist.Rd | 35 +++++ man/mash_plot_covar.Rd | 27 ++++ man/mash_plot_manhattan_by_condition.Rd | 47 ++++++ man/mash_plot_marker_effect.Rd | 44 ++++++ man/mash_plot_pairwise_sharing.Rd | 45 ++++++ man/mash_plot_sig_by_condition.Rd | 40 +++++ man/printf2.Rd | 11 ++ man/reorder_cormat.Rd | 15 ++ man/scale_cov.Rd | 23 +++ 26 files changed, 779 insertions(+), 63 deletions(-) create mode 100644 man/expand_cov.Rd create mode 100644 man/get_GxE.Rd create mode 100644 man/get_U_by_mass.Rd create mode 100644 man/get_colnames.Rd create mode 100644 man/get_date_filename.Rd create mode 100644 man/get_estimated_pi.Rd create mode 100644 man/get_log10bf.Rd create mode 100644 man/get_marker_df.Rd create mode 100644 man/get_n_significant_conditions.Rd create mode 100644 man/get_ncond.Rd create mode 100644 man/get_pairwise_sharing.Rd create mode 100644 man/get_significant_results.Rd create mode 100644 man/mash_plot_Ulist.Rd create mode 100644 man/mash_plot_covar.Rd create mode 100644 man/mash_plot_manhattan_by_condition.Rd create mode 100644 man/mash_plot_marker_effect.Rd create mode 100644 man/mash_plot_pairwise_sharing.Rd create mode 100644 man/mash_plot_sig_by_condition.Rd create mode 100644 man/printf2.Rd create mode 100644 man/reorder_cormat.Rd create mode 100644 man/scale_cov.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 8ce25de..2a4729c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,10 +15,13 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.1.1 Imports: ashr, + bigassertr, bigsnpr, bigstatsr, + cluster, cowplot, dplyr, + GGally, ggplot2, mashr, magrittr, @@ -26,6 +29,8 @@ Imports: purrr, readr, rlang (>= 0.1.2), + rlist, + stringr, tibble, tidyr, tidyselect diff --git a/NAMESPACE b/NAMESPACE index 0ec7dd1..b6ba566 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,25 +11,44 @@ export(dive_phe2mash) export(enquo) export(enquos) export(expr) +export(get_GxE) +export(get_U_by_mass) export(get_lambdagc) +export(get_pairwise_sharing) export(get_qqplot) +export(get_significant_results) +export(mash_plot_Ulist) +export(mash_plot_covar) +export(mash_plot_manhattan_by_condition) +export(mash_plot_marker_effect) +export(mash_plot_pairwise_sharing) +export(mash_plot_sig_by_condition) export(sym) export(syms) import(bigsnpr) import(bigstatsr) import(ggplot2) import(mashr) +importFrom(GGally,ggcorr) importFrom(ashr,get_fitted_g) +importFrom(ashr,get_lfsr) +importFrom(ashr,get_pm) +importFrom(ashr,get_psd) +importFrom(bigassertr,printf) importFrom(bigsnpr,snp_autoSVD) +importFrom(cluster,daisy) importFrom(cowplot,save_plot) importFrom(dplyr,arrange) +importFrom(dplyr,between) importFrom(dplyr,case_when) +importFrom(dplyr,desc) importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,mutate_if) +importFrom(dplyr,n) importFrom(dplyr,rename) importFrom(dplyr,select) importFrom(dplyr,slice) @@ -52,11 +71,15 @@ importFrom(rlang,enquos) importFrom(rlang,expr) importFrom(rlang,sym) importFrom(rlang,syms) +importFrom(rlist,list.append) +importFrom(stats,hclust) importFrom(stats,median) importFrom(stats,ppoints) importFrom(stats,predict) importFrom(stats,qbeta) importFrom(stats,uniroot) +importFrom(stringr,str_replace) +importFrom(stringr,str_replace_all) importFrom(tibble,add_column) importFrom(tibble,add_row) importFrom(tibble,as_tibble) @@ -64,6 +87,8 @@ importFrom(tibble,enframe) importFrom(tibble,rownames_to_column) importFrom(tibble,tibble) importFrom(tidyr,gather) +importFrom(tidyr,pivot_longer) importFrom(tidyr,replace_na) +importFrom(tidyr,separate) importFrom(tidyselect,all_of) importFrom(utils,tail) diff --git a/R/wrapper.R b/R/wrapper.R index 03a2fb9..176266f 100644 --- a/R/wrapper.R +++ b/R/wrapper.R @@ -42,6 +42,7 @@ #' @param U.hyp Other covariance matrices for mash. Specify these as a list. These #' matrices must have dimensions that match the number of phenotypes where #' univariate GWAS ran successfully. +#' @param verbose Output some information on the iterations? Default is `TRUE`. #' #' @return A mash object made up of all phenotypes where univariate GWAS ran #' successfully. @@ -56,6 +57,7 @@ #' @importFrom tidyr replace_na #' @importFrom matrixStats colMaxs rowMaxs #' @importFrom stats predict +#' @importFrom bigassertr printf #' #' @export dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", @@ -64,7 +66,7 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", thr.m = c("sum", "max"), num.strong = 1000, num.random = NA, scale.phe = TRUE, roll.size = 50, U.ed = NA, - U.hyp = NA){ + U.hyp = NA, verbose = TRUE){ # 1. Stop if not functions. ---- if (attr(snp, "class") != "bigSNP") { stop("snp needs to be a bigSNP object, produced by the bigsnpr package.") @@ -80,6 +82,9 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", } else { type <- rep(type, ncol(df) - 1) } + if (!dir.exists(outputdir)) { + dir.create(outputdir) + } ## 1a. Generate useful values ---- G <- snp$genotypes @@ -94,12 +99,13 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", bonferroni <- -log10(0.05/length(snp$map$physical.pos)) markers <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos, marker.ID = snp$map$marker.ID) %>% - mutate(CHRN = as.numeric(as.factor(.data$CHR))) + mutate(CHRN = as.numeric(as.factor(.data$CHR)), + CHR = as.factor(.data$CHR)) # 2. Pop Structure Correction ---- if (is.null(svd)) { - message(paste0("Covariance matrix (svd) was not supplied - this will be", - " generated using snp_autoSVD().")) + printf2(verbose = verbose, "\nCovariance matrix (svd) was not supplied - ") + printf2(verbose = verbose, "\nthis will be generated using snp_autoSVD()") svd <- snp_autoSVD(G = G, infos.chr = markers$CHRN, infos.pos = markers$POS, k = 10, thr.r2 = thr.r2, roll.size = roll.size) } else { @@ -107,6 +113,7 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", } pc_max <- ncol(svd$u) gwas_ok <- c() + first_gwas_ok <- FALSE for (i in 2:ncol(df)) { df1 <- df %>% @@ -127,8 +134,10 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", gwas_ok[i-1] <- check_gwas(df1 = df1, phename = phename, type = type[i-1], nPhe = nPhe, minphe = min.phe, nLev = nLev) + # Find best # PCs to correct for population structure for each phenotype. if(gwas_ok[i-1]){ + lambdagc_df <- div_lambda_GC(df = df1, type = type[i-1], snp = snp, svd = svd, npcs = c(0:pc_max)) PC_df <- get_best_PC_df(lambdagc_df) @@ -145,29 +154,22 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", nsnp = nSNP, npcs = PC_df$NumPCs, nphe = nPhe, nlev = nLev, lambda_GC = PC_df$lambda_GC, bonferroni = bonferroni) - # plot Manhattan and QQ if save.plots == TRUE - if(save.plots == TRUE){ + + # plot QQ if save.plots == TRUE + if (save.plots == TRUE) { qqplot <- get_qqplot(ps = gwas$pvalue, lambdaGC = TRUE) - manhattan <- get_manhattan(log10p = gwas$log10p, snp = snp, - thresh = bonferroni) # could round these too - plotname <- paste0(gwas_data$phe, "_", gwas_data$type, "_model_", - gwas_data$nphe, "g_", gwas_data$nsnp, "_SNPs_", - gwas_data$npcs, "_PCs.png") - save_plot(filename = file.path(outputdir, paste0("QQplot_", plotname)), - plot = qqplot, base_asp = 1, base_height = 4) - save_plot(filename = file.path(outputdir, paste0("Manhattan_", plotname)), - plot = manhattan, base_asp = 2.1, base_height = 3.5) - } + } # save gwas outputs together in a fbm gwas <- gwas %>% select(.data[["estim"]], .data[["std.err"]], .data[["log10p"]]) - if(i == 2){ # save .bk and .rds file the first time through the loop. + if(!first_gwas_ok){ # save .bk and .rds file the first time through the loop. if (!grepl("_$", suffix) & suffix != ""){ suffix <- paste0("_", suffix) } - fbm.name <- paste0(outputdir, "gwas_effects", suffix) + first_gwas_ok <- TRUE + fbm.name <- file.path(outputdir, paste0("gwas_effects", suffix)) colnames_fbm <- c(paste0(phename, "_Effect"), paste0(phename, "_SE"), paste0(phename, "_log10p")) @@ -179,18 +181,43 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", colnames_fbm <- c(colnames_fbm, paste0(phename, "_Effect"), paste0(phename, "_SE"), paste0(phename, "_log10p")) gwas2$add_columns(ncol_add = 3) - gwas2[,c(i*3-5, i*3-4, i*3-3)] <- gwas + gwas2[, c(sum(gwas_ok)*3 - 2, sum(gwas_ok)*3 - 1, + sum(gwas_ok)*3)] <- gwas gwas2$save() - gwas_metadata <- add_row(gwas_metadata, phe = phename, type = type[i-1], + gwas_metadata <- add_row(gwas_metadata, phe = phename, type = type[i - 1], nsnp = nSNP, npcs = PC_df$NumPCs, nphe = nPhe, nlev = nLev, lambda_GC = PC_df$lambda_GC, bonferroni = bonferroni) } + # plot Manhattan and QQ if save.plots == TRUE + if (save.plots == TRUE) { + # set aspect ratio based on number of SNPs in snp file + asp <- log10(snp$genotypes$ncol)/2 + if(asp < 1.1){ + asp <- 1.1 + } + + manhattan <- get_manhattan(X = gwas2, ind = sum(gwas_ok)*3, snp = snp, + thresh = bonferroni) + plotname <- paste0(gwas_data$phe, "_", gwas_data$type, "_model_", + gwas_data$nphe, "g_", gwas_data$nsnp, "_SNPs_", + gwas_data$npcs, "_PCs.png") + save_plot(filename = file.path(outputdir, paste0("QQplot_", plotname)), + plot = qqplot, base_asp = 1, base_height = 4) + save_plot(filename = file.path(outputdir, paste0("Manhattan_", plotname)), + plot = manhattan, base_asp = asp, base_height = 3.75) + + } rm(gwas) - message(paste0("Finished phenotype ", i-1, ": ", names(df)[i])) + printf2(verbose = verbose, "\nFinished GWAS on phenotype %s. ", + names(df)[i]) + } else { + printf2(verbose = verbose, "\nSkipping GWAS on phenotype %s. ", + names(df)[i]) } } + printf2(verbose = verbose, "\nNow preparing gwas effects for use in mash.\n") # 4. mash input ---- ## prioritize effects with max(log10p) or max(sum(log10p)) ## make a random set of relatively unlinked SNPs @@ -198,7 +225,7 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", ind_se <- (1:sum(gwas_ok))*3 - 1 ind_p <- (1:sum(gwas_ok))*3 - if(thr.m == "sum"){ + if (thr.m == "sum") { thr_log10p <- big_apply(gwas2, a.FUN = function(X, ind) rowSums(X[, ind]), ind = ind_p, @@ -211,21 +238,21 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", } gwas2$add_columns(ncol_add = 1) colnames_fbm <- c(colnames_fbm, paste0(thr.m, "_thr_log10p")) - gwas2[,(sum(gwas_ok)*3+1)] <- thr_log10p + gwas2[,(sum(gwas_ok)*3 + 1)] <- thr_log10p gwas2$save() ## replace NA or Nan values # Replace SE with 1's, estimates and p values with 0's. replace_na_1 <- function(X, ind) replace_na(X[, ind], 1) replace_na_0 <- function(X, ind) replace_na(X[, ind], 0) - gwas2[,ind_se] <- big_apply(gwas2, a.FUN = replace_na_1, ind = ind_se, + gwas2[, ind_se] <- big_apply(gwas2, a.FUN = replace_na_1, ind = ind_se, a.combine = 'plus') - gwas2[,ind_estim] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_estim, + gwas2[, ind_estim] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_estim, a.combine = 'plus') - gwas2[,ind_p] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_p, + gwas2[, ind_p] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_p, a.combine = 'plus') - gwas2[,(sum(gwas_ok)*3+1)] <- big_apply(gwas2, a.FUN = replace_na_0, - ind = (sum(gwas_ok)*3+1), + gwas2[, (sum(gwas_ok)*3+1)] <- big_apply(gwas2, a.FUN = replace_na_0, + ind = (sum(gwas_ok)*3 + 1), a.combine = 'plus') gwas2$save() @@ -301,31 +328,26 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", # 5. mash ---- data_r <- mashr::mash_set_data(Bhat_random, Shat_random) - message(paste0("Estimating the correlation structure in the null tests from ", - "the random data. - (not the strong data because it will not necessarily contain - any null tests).")) + printf2(verbose = verbose, "\nEstimating correlation structure in the null tests from a random sample of clumped data.") Vhat <- mashr::estimate_null_correlation_simple(data = data_r) - message(paste0("Setting up the main data objects with this correlation ", - "structure in place.")) data_strong <- mashr::mash_set_data(Bhat_strong, Shat_strong, V = Vhat) data_random <- mashr::mash_set_data(Bhat_random, Shat_random, V = Vhat) data_full <- mashr::mash_set_data(Bhat_full, Shat_full, V = Vhat) U_c <- mashr::cov_canonical(data_random) - if (!is.na(U.ed[1])) { - message(paste0("Now estimating data-driven covariances using the strong", - " tests. - NB: This step may take some time to complete.")) - if (length(ind_p) < 6) { - cov_npc <- ind_p - 1 - } else { - cov_npc <- 5 - } - U_pca = mashr::cov_pca(data_strong, npc = cov_npc) - U_ed = mashr::cov_ed(data_strong, U_pca) - saveRDS(U_ed, file = paste0(outputdir, "Mash_U_ed", suffix, ".rds")) + if (is.na(U.ed[1])) { + printf2(verbose = verbose, "\nNow estimating data-driven covariances using + the strong tests. NB: This step may take some time to complete.\n") + if (length(ind_p) < 6) { + cov_npc <- ind_p - 1 + } else { + cov_npc <- 5 + } + U_pca = mashr::cov_pca(data_strong, npc = cov_npc) + U_ed = mashr::cov_ed(data_strong, U_pca) + saveRDS(U_ed, file = file.path(outputdir, paste0("Mash_U_ed", suffix, + ".rds"))) } else if (typeof(U.ed) == "list") { U_ed <- U.ed } else if (typeof(U.ed) == "character") { @@ -337,12 +359,15 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", if (typeof(U.hyp) == "list") { m = mashr::mash(data_random, Ulist = c(U_ed, U_c, U.hyp), outputlevel = 1) - } else { + } else if (typeof(U.hyp) == "character") { + U_hyp <- readRDS(file = U.hyp) + m = mashr::mash(data_random, Ulist = c(U_ed, U_c, U_hyp), outputlevel = 1) + } else { m = mashr::mash(data_random, Ulist = c(U_ed, U_c), outputlevel = 1) + printf2(verbose = verbose, "\nNo user-specified covariance matrices were included in the mash fit.") } - message(paste0("Compute posterior matrices for all effects", - " using the mash fit from the - random tests.")) + printf2(verbose = verbose, "\nComputing posterior weights for all effects + using the mash fit from the random tests.") m2 = mashr::mash(data_full, g = ashr::get_fitted_g(m), fixg = TRUE) return(m2) @@ -429,6 +454,9 @@ div_gwas <- function(df, snp, type, svd, npcs){ return(gwaspc) } +#' Verbose? +#' @importFrom bigassertr printf +printf2 <- function(verbose, ...) if (verbose) { printf(...) } #' Create a quantile-quantile plot with ggplot2. #' @@ -544,17 +572,27 @@ round_xy <- function(x, y, cl = NA, cu = NA, roundby = 0.001){ } } -get_manhattan <- function(log10p, snp, thresh){ +get_manhattan <- function(X, ind, snp, thresh){ + roundFBM <- function(X, ind, at) ceiling(X[, ind] / at) * at + observed <- big_apply(X, ind = ind, a.FUN = roundFBM, at = 0.01, + a.combine = 'plus') + plot_data <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos, - marker.ID = snp$map$marker.ID, log10p = log10p) %>% - mutate(observed = round2(.data$log10p, at = 0.001)) %>% + marker.ID = snp$map$marker.ID, observed = observed) + + if (length(unique(snp$map$physical.pos)) >= 500000) { + plot_data <- plot_data %>% + mutate(POS = round2(.data$POS, at = 250000)) + } + plot_data <- plot_data %>% group_by(.data$CHR, .data$POS, .data$observed) %>% - slice(1) + slice(1) %>% + mutate(CHR = as.factor(.data$CHR)) nchr <- length(unique(plot_data$CHR)) p1 <- plot_data %>% - ggplot(aes(x = .data$POS, y = .data$log10p)) + + ggplot(aes(x = .data$POS, y = .data$observed)) + geom_point(aes(color = .data$CHR, fill = .data$CHR)) + geom_hline(yintercept = thresh, color = "black", linetype = 2, size = 1) + @@ -583,7 +621,7 @@ get_manhattan <- function(log10p, snp, thresh){ strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0), strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) + labs(x = "Chromosome", y = "-log10(p value)") + - scale_x_continuous(expand = c(0.2, 0.2)) + scale_x_continuous(expand = c(0.15, 0.15)) return(p1) } @@ -737,10 +775,9 @@ get_lambdagc <- function(ps, tol = 1e-8){ } -#' Return best number of PCs in terms of lambda_GC for Panicum virgatum. -#' Return best number of PCs in terms of lambda_GC for the CDBN. +#' Return best number of PCs in terms of lambda_GC #' -#' @description Given a dataframe created using pvdiv_lambda_GC, this function +#' @description Given a dataframe created using div_lambda_GC, this function #' returns the first lambda_GC less than 1.05, or the smallest lambda_GC, #' for each column in the dataframe. #' @@ -812,3 +849,35 @@ check_gwas <- function(df1, phename, type, nPhe, minphe, nLev){ } return(gwas_ok) } + + +## @title Basic sanity check for covariance matrices +## @param X input matrix +check_covmat_basics = function(x) { + label = substitute(x) + if (!is.matrix(x)) + labelled_stop(label, "is not a matrix") + if (!is.numeric(x)) + labelled_stop(label, "is not a numeric matrix") + if (any(is.na(x))) + labelled_stop(label, "cannot contain NA values") + if (any(is.infinite(x))) + labelled_stop(label, "cannot contain Inf values") + if (any(is.nan(x))) + labelled_stop(label, "cannot contain NaN values") + if (nrow(x) != ncol(x)) + labelled_stop(label, "is not a square matrix") + if (!isSymmetric(x, check.attributes = FALSE)) + labelled_stop(label, "is not a symmetric matrix") + return(TRUE) +} + +## @title check matrix for positive definitness +## @param X input matrix +check_positive_definite = function(x) { + check_covmat_basics(x) + tryCatch(chol(x), + error = function(e) labelled_stop(substitute(x), + "must be positive definite")) + return(TRUE) +} diff --git a/man/dive_phe2mash.Rd b/man/dive_phe2mash.Rd index cfe1422..6f62b47 100644 --- a/man/dive_phe2mash.Rd +++ b/man/dive_phe2mash.Rd @@ -20,7 +20,8 @@ dive_phe2mash( scale.phe = TRUE, roll.size = 50, U.ed = NA, - U.hyp = NA + U.hyp = NA, + verbose = TRUE ) } \arguments{ @@ -73,6 +74,8 @@ generating these once and reusing them for multiple mash runs can save time.} \item{U.hyp}{Other covariance matrices for mash. Specify these as a list. These matrices must have dimensions that match the number of phenotypes where univariate GWAS ran successfully.} + +\item{verbose}{Output some information on the iterations? Default is \code{TRUE}.} } \value{ A mash object made up of all phenotypes where univariate GWAS ran diff --git a/man/expand_cov.Rd b/man/expand_cov.Rd new file mode 100644 index 0000000..d7661bf --- /dev/null +++ b/man/expand_cov.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{expand_cov} +\alias{expand_cov} +\title{Create expanded list of covariance matrices expanded by +grid, Sigma_{lk} = omega_l U_k} +\usage{ +expand_cov(Ulist, grid, usepointmass = TRUE) +} +\arguments{ +\item{Ulist}{a list of covarance matrices} + +\item{grid}{a grid of scalar values by which the covariance +matrices are to be sc} + +\item{usepointmass}{if TRUE adds a point mass at 0 (null component) +to the list} +} +\value{ +This takes the covariance matrices in Ulist and multiplies +them by the grid values If usepointmass is TRUE then it adds a null +component. +} +\description{ +This is an internal (non-exported) function. This help +page provides additional documentation mainly intended for +developers and expert users. +} +\keyword{internal} diff --git a/man/get_GxE.Rd b/man/get_GxE.Rd new file mode 100644 index 0000000..8001ce4 --- /dev/null +++ b/man/get_GxE.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{get_GxE} +\alias{get_GxE} +\title{Get data frames of types of GxE from a mash object} +\usage{ +get_GxE(m, factor = 0.4, thresh = 0.05) +} +\arguments{ +\item{m}{An object of type mash} + +\item{factor}{a number between 0 and 1. The factor within which effects are +considered to be shared.} + +\item{thresh}{Numeric. The threshold for including an effect in the assessment} +} +\value{ +A list containing eight data frames. Those with names that start +"S_" contain significant effects of different types between pairs of +named rows and columns. S_all_pairwise contains all significant effects; +NS_pairwise contains all non-significant effects. S_CN contains effects +significant in only one condition, and effects with a significantly +different magnitude (differential sensitivity). This dataframe is not +conservative using the local false sign rate test - we can't determine +the sign of one of the effects for effects significant in only one +condition - so it's not recommended to use this, but included. S_2_no +contains effects significant in both conditions that do not differ +significantly in magnitude. These effects do not have GxE. S_AP contains +effects significant in both conditions that differ in their sign - and +have antagonistic pleiotropy. S_DS contains effects significant in both +conditions that differ in the magnitude of their effect, but not their +sign - differentially sensitive alleles. S_1_row and S_1_col contain +effects that are significant in just one of the two conditions - the row +or the column, respectively. +} +\description{ +Performs set operations to determine pairwise GxE for effects +from a mash object. +} diff --git a/man/get_U_by_mass.Rd b/man/get_U_by_mass.Rd new file mode 100644 index 0000000..ef20436 --- /dev/null +++ b/man/get_U_by_mass.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{get_U_by_mass} +\alias{get_U_by_mass} +\title{Get the positions of objects in a mash object Ulist that are above +some mass threshold.} +\usage{ +get_U_by_mass(m, thresh = 0.05) +} +\arguments{ +\item{m}{An object of type mash} + +\item{thresh}{Numeric. The mass threshold for including a covariance matrix} +} +\description{ +Get the positions of objects in a mash object Ulist that are +above some mass threshold. +} diff --git a/man/get_best_PC_df.Rd b/man/get_best_PC_df.Rd index a0623fd..de0eb3c 100644 --- a/man/get_best_PC_df.Rd +++ b/man/get_best_PC_df.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/wrapper.R \name{get_best_PC_df} \alias{get_best_PC_df} -\title{Return best number of PCs in terms of lambda_GC for Panicum virgatum. -Return best number of PCs in terms of lambda_GC for the CDBN.} +\title{Return best number of PCs in terms of lambda_GC} \usage{ get_best_PC_df(df) } @@ -16,7 +15,7 @@ A dataframe containing the best lambda_GC value and number of PCs for each phenotype in the data frame. } \description{ -Given a dataframe created using pvdiv_lambda_GC, this function +Given a dataframe created using div_lambda_GC, this function returns the first lambda_GC less than 1.05, or the smallest lambda_GC, for each column in the dataframe. } diff --git a/man/get_colnames.Rd b/man/get_colnames.Rd new file mode 100644 index 0000000..59441f2 --- /dev/null +++ b/man/get_colnames.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{get_colnames} +\alias{get_colnames} +\title{Get column names from a mash object} +\usage{ +get_colnames(m) +} +\arguments{ +\item{m}{An object of type mash} +} +\value{ +A vector of phenotype names +} +\description{ +This function extracts the column names from the local false +sign rate table of a mash object's results. This can tell you the condition +names or phenotype names used in the mash object. That can be useful for +looking at a subset of these columns, say. +} +\examples{ + \dontrun{get_colnames(m = mash_obj)} + +} diff --git a/man/get_date_filename.Rd b/man/get_date_filename.Rd new file mode 100644 index 0000000..07206bc --- /dev/null +++ b/man/get_date_filename.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{get_date_filename} +\alias{get_date_filename} +\title{Get current date-time in a filename-appropriate format.} +\usage{ +get_date_filename() +} +\value{ +A string containing the current date-time with spaces and colons +replaced with underscores and periods, respectively. +} +\description{ +Converts the current \code{Sys.time()} system time to a format +that is acceptable to include in a filename. Changes punctuation that +won't work in a filename. +} diff --git a/man/get_estimated_pi.Rd b/man/get_estimated_pi.Rd new file mode 100644 index 0000000..f0426b1 --- /dev/null +++ b/man/get_estimated_pi.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{get_estimated_pi} +\alias{get_estimated_pi} +\title{Return the estimated mixture proportions. Use get_estimated_pi to +extract the estimates of the mixture proportions for different types of +covariance matrix. This tells you which covariance matrices have most of +the mass.} +\usage{ +get_estimated_pi(m, dimension = c("cov", "grid", "all")) +} +\arguments{ +\item{m}{the mash result} + +\item{dimension}{indicates whether you want the mixture proportions for the +covariances, grid, or all} +} +\value{ +a named vector containing the estimated mixture proportions. +} +\description{ +Return the estimated mixture proportions. Use get_estimated_pi to +extract the estimates of the mixture proportions for different types of +covariance matrix. This tells you which covariance matrices have most of +the mass. +} +\details{ +If the fit was done with \code{usepointmass=TRUE} then the first +element of the returned vector will correspond to the null, and the +remaining elements to the non-null covariance matrices. Suppose the fit +was done with $K$ covariances and a grid of length $L$. If +\code{dimension=cov} then the returned vector will be of length $K$ +(or $K+1$ if \code{usepointmass=TRUE}). If \code{dimension=grid} then +the returned vector will be of length $L$ (or $L+1$). If +\code{dimension=all} then the returned vector will be of length $LK$ (or +$LK+1$). The names of the vector will be informative for which +combination each element corresponds to. +} diff --git a/man/get_log10bf.Rd b/man/get_log10bf.Rd new file mode 100644 index 0000000..e060c9c --- /dev/null +++ b/man/get_log10bf.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{get_log10bf} +\alias{get_log10bf} +\title{Return the Bayes Factor for each effect} +\usage{ +get_log10bf(m) +} +\arguments{ +\item{m}{the mash result (from joint or 1by1 analysis); must have been +computed using usepointmass = TRUE} +} +\value{ +if m was fitted using usepointmass=TRUE then returns a vector of +the log10(bf) values for each effect. That is, the jth element +lbf_j is log10(Pr(Bj | g = ghat-nonnull)/Pr(Bj | g = 0)) where gha +t-nonnull is the non-null part of ghat. Otherwise returns NULL. +} +\description{ +Return the Bayes Factor for each effect +} diff --git a/man/get_marker_df.Rd b/man/get_marker_df.Rd new file mode 100644 index 0000000..f72e7b2 --- /dev/null +++ b/man/get_marker_df.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{get_marker_df} +\alias{get_marker_df} +\title{Get mash marker_df} +\usage{ +get_marker_df(m, snp) +} +\arguments{ +\item{m}{An object of type mash} + +\item{snp}{A bigSNP object, produced by the bigsnpr package. Here, the WAMI +SNP information.} +} +\description{ +Pulls SNP markers information in the mash object from a bigsnp +object. +} diff --git a/man/get_n_significant_conditions.Rd b/man/get_n_significant_conditions.Rd new file mode 100644 index 0000000..10cc5a0 --- /dev/null +++ b/man/get_n_significant_conditions.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{get_n_significant_conditions} +\alias{get_n_significant_conditions} +\title{Count number of conditions each effect is significant in} +\usage{ +get_n_significant_conditions( + m, + thresh = 0.05, + conditions = NULL, + sig_fn = get_lfsr +) +} +\arguments{ +\item{m}{the mash result (from joint or 1by1 analysis)} + +\item{thresh}{indicates the threshold below which to call signals significant} + +\item{conditions}{which conditions to include in check (default to all)} + +\item{sig_fn}{the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr} +} +\value{ +a vector containing the number of significant conditions +} +\description{ +Count number of conditions each effect is significant in +} diff --git a/man/get_ncond.Rd b/man/get_ncond.Rd new file mode 100644 index 0000000..36f60b4 --- /dev/null +++ b/man/get_ncond.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{get_ncond} +\alias{get_ncond} +\title{Get number of conditions} +\usage{ +get_ncond(m) +} +\arguments{ +\item{m}{The mash result} +} +\description{ +Get number of conditions +} diff --git a/man/get_pairwise_sharing.Rd b/man/get_pairwise_sharing.Rd new file mode 100644 index 0000000..254800f --- /dev/null +++ b/man/get_pairwise_sharing.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mash-evaluation.R +\name{get_pairwise_sharing} +\alias{get_pairwise_sharing} +\title{Compute the proportion of (significant) signals shared by magnitude in each pair of conditions, based on the poterior mean} +\usage{ +get_pairwise_sharing(m, factor = 0.5, lfsr_thresh = 0.05, FUN = identity) +} +\arguments{ +\item{m}{the mash fit} + +\item{factor}{a number between 0 and 1 - the factor within which effects are +considered to be shared.} + +\item{lfsr_thresh}{the lfsr threshold for including an effect in the +assessment} + +\item{FUN}{a function to be applied to the estimated effect sizes before +assessing sharing. The most obvious choice beside the default +'FUN=identity' would be 'FUN=abs' if you want to ignore the sign of the +effects when assesing sharing.} +} +\description{ +Compute the proportion of (significant) signals shared by magnitude in each pair of conditions, based on the poterior mean +} +\details{ +For each pair of tissues, first identify the effects that are +significant (by lfsr