Error checks and plotting speedups for dive_phe2mash function
This commit is contained in:
@@ -15,10 +15,13 @@ Roxygen: list(markdown = TRUE)
|
|||||||
RoxygenNote: 7.1.1
|
RoxygenNote: 7.1.1
|
||||||
Imports:
|
Imports:
|
||||||
ashr,
|
ashr,
|
||||||
|
bigassertr,
|
||||||
bigsnpr,
|
bigsnpr,
|
||||||
bigstatsr,
|
bigstatsr,
|
||||||
|
cluster,
|
||||||
cowplot,
|
cowplot,
|
||||||
dplyr,
|
dplyr,
|
||||||
|
GGally,
|
||||||
ggplot2,
|
ggplot2,
|
||||||
mashr,
|
mashr,
|
||||||
magrittr,
|
magrittr,
|
||||||
@@ -26,6 +29,8 @@ Imports:
|
|||||||
purrr,
|
purrr,
|
||||||
readr,
|
readr,
|
||||||
rlang (>= 0.1.2),
|
rlang (>= 0.1.2),
|
||||||
|
rlist,
|
||||||
|
stringr,
|
||||||
tibble,
|
tibble,
|
||||||
tidyr,
|
tidyr,
|
||||||
tidyselect
|
tidyselect
|
||||||
|
|||||||
25
NAMESPACE
25
NAMESPACE
@@ -11,25 +11,44 @@ export(dive_phe2mash)
|
|||||||
export(enquo)
|
export(enquo)
|
||||||
export(enquos)
|
export(enquos)
|
||||||
export(expr)
|
export(expr)
|
||||||
|
export(get_GxE)
|
||||||
|
export(get_U_by_mass)
|
||||||
export(get_lambdagc)
|
export(get_lambdagc)
|
||||||
|
export(get_pairwise_sharing)
|
||||||
export(get_qqplot)
|
export(get_qqplot)
|
||||||
|
export(get_significant_results)
|
||||||
|
export(mash_plot_Ulist)
|
||||||
|
export(mash_plot_covar)
|
||||||
|
export(mash_plot_manhattan_by_condition)
|
||||||
|
export(mash_plot_marker_effect)
|
||||||
|
export(mash_plot_pairwise_sharing)
|
||||||
|
export(mash_plot_sig_by_condition)
|
||||||
export(sym)
|
export(sym)
|
||||||
export(syms)
|
export(syms)
|
||||||
import(bigsnpr)
|
import(bigsnpr)
|
||||||
import(bigstatsr)
|
import(bigstatsr)
|
||||||
import(ggplot2)
|
import(ggplot2)
|
||||||
import(mashr)
|
import(mashr)
|
||||||
|
importFrom(GGally,ggcorr)
|
||||||
importFrom(ashr,get_fitted_g)
|
importFrom(ashr,get_fitted_g)
|
||||||
|
importFrom(ashr,get_lfsr)
|
||||||
|
importFrom(ashr,get_pm)
|
||||||
|
importFrom(ashr,get_psd)
|
||||||
|
importFrom(bigassertr,printf)
|
||||||
importFrom(bigsnpr,snp_autoSVD)
|
importFrom(bigsnpr,snp_autoSVD)
|
||||||
|
importFrom(cluster,daisy)
|
||||||
importFrom(cowplot,save_plot)
|
importFrom(cowplot,save_plot)
|
||||||
importFrom(dplyr,arrange)
|
importFrom(dplyr,arrange)
|
||||||
|
importFrom(dplyr,between)
|
||||||
importFrom(dplyr,case_when)
|
importFrom(dplyr,case_when)
|
||||||
|
importFrom(dplyr,desc)
|
||||||
importFrom(dplyr,filter)
|
importFrom(dplyr,filter)
|
||||||
importFrom(dplyr,full_join)
|
importFrom(dplyr,full_join)
|
||||||
importFrom(dplyr,group_by)
|
importFrom(dplyr,group_by)
|
||||||
importFrom(dplyr,left_join)
|
importFrom(dplyr,left_join)
|
||||||
importFrom(dplyr,mutate)
|
importFrom(dplyr,mutate)
|
||||||
importFrom(dplyr,mutate_if)
|
importFrom(dplyr,mutate_if)
|
||||||
|
importFrom(dplyr,n)
|
||||||
importFrom(dplyr,rename)
|
importFrom(dplyr,rename)
|
||||||
importFrom(dplyr,select)
|
importFrom(dplyr,select)
|
||||||
importFrom(dplyr,slice)
|
importFrom(dplyr,slice)
|
||||||
@@ -52,11 +71,15 @@ importFrom(rlang,enquos)
|
|||||||
importFrom(rlang,expr)
|
importFrom(rlang,expr)
|
||||||
importFrom(rlang,sym)
|
importFrom(rlang,sym)
|
||||||
importFrom(rlang,syms)
|
importFrom(rlang,syms)
|
||||||
|
importFrom(rlist,list.append)
|
||||||
|
importFrom(stats,hclust)
|
||||||
importFrom(stats,median)
|
importFrom(stats,median)
|
||||||
importFrom(stats,ppoints)
|
importFrom(stats,ppoints)
|
||||||
importFrom(stats,predict)
|
importFrom(stats,predict)
|
||||||
importFrom(stats,qbeta)
|
importFrom(stats,qbeta)
|
||||||
importFrom(stats,uniroot)
|
importFrom(stats,uniroot)
|
||||||
|
importFrom(stringr,str_replace)
|
||||||
|
importFrom(stringr,str_replace_all)
|
||||||
importFrom(tibble,add_column)
|
importFrom(tibble,add_column)
|
||||||
importFrom(tibble,add_row)
|
importFrom(tibble,add_row)
|
||||||
importFrom(tibble,as_tibble)
|
importFrom(tibble,as_tibble)
|
||||||
@@ -64,6 +87,8 @@ importFrom(tibble,enframe)
|
|||||||
importFrom(tibble,rownames_to_column)
|
importFrom(tibble,rownames_to_column)
|
||||||
importFrom(tibble,tibble)
|
importFrom(tibble,tibble)
|
||||||
importFrom(tidyr,gather)
|
importFrom(tidyr,gather)
|
||||||
|
importFrom(tidyr,pivot_longer)
|
||||||
importFrom(tidyr,replace_na)
|
importFrom(tidyr,replace_na)
|
||||||
|
importFrom(tidyr,separate)
|
||||||
importFrom(tidyselect,all_of)
|
importFrom(tidyselect,all_of)
|
||||||
importFrom(utils,tail)
|
importFrom(utils,tail)
|
||||||
|
|||||||
187
R/wrapper.R
187
R/wrapper.R
@@ -42,6 +42,7 @@
|
|||||||
#' @param U.hyp Other covariance matrices for mash. Specify these as a list. These
|
#' @param U.hyp Other covariance matrices for mash. Specify these as a list. These
|
||||||
#' matrices must have dimensions that match the number of phenotypes where
|
#' matrices must have dimensions that match the number of phenotypes where
|
||||||
#' univariate GWAS ran successfully.
|
#' univariate GWAS ran successfully.
|
||||||
|
#' @param verbose Output some information on the iterations? Default is `TRUE`.
|
||||||
#'
|
#'
|
||||||
#' @return A mash object made up of all phenotypes where univariate GWAS ran
|
#' @return A mash object made up of all phenotypes where univariate GWAS ran
|
||||||
#' successfully.
|
#' successfully.
|
||||||
@@ -56,6 +57,7 @@
|
|||||||
#' @importFrom tidyr replace_na
|
#' @importFrom tidyr replace_na
|
||||||
#' @importFrom matrixStats colMaxs rowMaxs
|
#' @importFrom matrixStats colMaxs rowMaxs
|
||||||
#' @importFrom stats predict
|
#' @importFrom stats predict
|
||||||
|
#' @importFrom bigassertr printf
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
||||||
@@ -64,7 +66,7 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
thr.m = c("sum", "max"), num.strong = 1000,
|
thr.m = c("sum", "max"), num.strong = 1000,
|
||||||
num.random = NA,
|
num.random = NA,
|
||||||
scale.phe = TRUE, roll.size = 50, U.ed = NA,
|
scale.phe = TRUE, roll.size = 50, U.ed = NA,
|
||||||
U.hyp = NA){
|
U.hyp = NA, verbose = TRUE){
|
||||||
# 1. Stop if not functions. ----
|
# 1. Stop if not functions. ----
|
||||||
if (attr(snp, "class") != "bigSNP") {
|
if (attr(snp, "class") != "bigSNP") {
|
||||||
stop("snp needs to be a bigSNP object, produced by the bigsnpr package.")
|
stop("snp needs to be a bigSNP object, produced by the bigsnpr package.")
|
||||||
@@ -80,6 +82,9 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
} else {
|
} else {
|
||||||
type <- rep(type, ncol(df) - 1)
|
type <- rep(type, ncol(df) - 1)
|
||||||
}
|
}
|
||||||
|
if (!dir.exists(outputdir)) {
|
||||||
|
dir.create(outputdir)
|
||||||
|
}
|
||||||
|
|
||||||
## 1a. Generate useful values ----
|
## 1a. Generate useful values ----
|
||||||
G <- snp$genotypes
|
G <- snp$genotypes
|
||||||
@@ -94,12 +99,13 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
bonferroni <- -log10(0.05/length(snp$map$physical.pos))
|
bonferroni <- -log10(0.05/length(snp$map$physical.pos))
|
||||||
markers <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos,
|
markers <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos,
|
||||||
marker.ID = snp$map$marker.ID) %>%
|
marker.ID = snp$map$marker.ID) %>%
|
||||||
mutate(CHRN = as.numeric(as.factor(.data$CHR)))
|
mutate(CHRN = as.numeric(as.factor(.data$CHR)),
|
||||||
|
CHR = as.factor(.data$CHR))
|
||||||
|
|
||||||
# 2. Pop Structure Correction ----
|
# 2. Pop Structure Correction ----
|
||||||
if (is.null(svd)) {
|
if (is.null(svd)) {
|
||||||
message(paste0("Covariance matrix (svd) was not supplied - this will be",
|
printf2(verbose = verbose, "\nCovariance matrix (svd) was not supplied - ")
|
||||||
" generated using snp_autoSVD()."))
|
printf2(verbose = verbose, "\nthis will be generated using snp_autoSVD()")
|
||||||
svd <- snp_autoSVD(G = G, infos.chr = markers$CHRN, infos.pos = markers$POS,
|
svd <- snp_autoSVD(G = G, infos.chr = markers$CHRN, infos.pos = markers$POS,
|
||||||
k = 10, thr.r2 = thr.r2, roll.size = roll.size)
|
k = 10, thr.r2 = thr.r2, roll.size = roll.size)
|
||||||
} else {
|
} else {
|
||||||
@@ -107,6 +113,7 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
}
|
}
|
||||||
pc_max <- ncol(svd$u)
|
pc_max <- ncol(svd$u)
|
||||||
gwas_ok <- c()
|
gwas_ok <- c()
|
||||||
|
first_gwas_ok <- FALSE
|
||||||
|
|
||||||
for (i in 2:ncol(df)) {
|
for (i in 2:ncol(df)) {
|
||||||
df1 <- df %>%
|
df1 <- df %>%
|
||||||
@@ -127,8 +134,10 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
gwas_ok[i-1] <- check_gwas(df1 = df1, phename = phename, type = type[i-1],
|
gwas_ok[i-1] <- check_gwas(df1 = df1, phename = phename, type = type[i-1],
|
||||||
nPhe = nPhe, minphe = min.phe, nLev = nLev)
|
nPhe = nPhe, minphe = min.phe, nLev = nLev)
|
||||||
|
|
||||||
|
|
||||||
# Find best # PCs to correct for population structure for each phenotype.
|
# Find best # PCs to correct for population structure for each phenotype.
|
||||||
if(gwas_ok[i-1]){
|
if(gwas_ok[i-1]){
|
||||||
|
|
||||||
lambdagc_df <- div_lambda_GC(df = df1, type = type[i-1], snp = snp,
|
lambdagc_df <- div_lambda_GC(df = df1, type = type[i-1], snp = snp,
|
||||||
svd = svd, npcs = c(0:pc_max))
|
svd = svd, npcs = c(0:pc_max))
|
||||||
PC_df <- get_best_PC_df(lambdagc_df)
|
PC_df <- get_best_PC_df(lambdagc_df)
|
||||||
@@ -145,29 +154,22 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
nsnp = nSNP, npcs = PC_df$NumPCs, nphe = nPhe,
|
nsnp = nSNP, npcs = PC_df$NumPCs, nphe = nPhe,
|
||||||
nlev = nLev, lambda_GC = PC_df$lambda_GC,
|
nlev = nLev, lambda_GC = PC_df$lambda_GC,
|
||||||
bonferroni = bonferroni)
|
bonferroni = bonferroni)
|
||||||
# plot Manhattan and QQ if save.plots == TRUE
|
|
||||||
if(save.plots == TRUE){
|
# plot QQ if save.plots == TRUE
|
||||||
|
if (save.plots == TRUE) {
|
||||||
qqplot <- get_qqplot(ps = gwas$pvalue, lambdaGC = TRUE)
|
qqplot <- get_qqplot(ps = gwas$pvalue, lambdaGC = TRUE)
|
||||||
manhattan <- get_manhattan(log10p = gwas$log10p, snp = snp,
|
}
|
||||||
thresh = bonferroni) # could round these too
|
|
||||||
plotname <- paste0(gwas_data$phe, "_", gwas_data$type, "_model_",
|
|
||||||
gwas_data$nphe, "g_", gwas_data$nsnp, "_SNPs_",
|
|
||||||
gwas_data$npcs, "_PCs.png")
|
|
||||||
save_plot(filename = file.path(outputdir, paste0("QQplot_", plotname)),
|
|
||||||
plot = qqplot, base_asp = 1, base_height = 4)
|
|
||||||
save_plot(filename = file.path(outputdir, paste0("Manhattan_", plotname)),
|
|
||||||
plot = manhattan, base_asp = 2.1, base_height = 3.5)
|
|
||||||
}
|
|
||||||
|
|
||||||
# save gwas outputs together in a fbm
|
# save gwas outputs together in a fbm
|
||||||
gwas <- gwas %>%
|
gwas <- gwas %>%
|
||||||
select(.data[["estim"]], .data[["std.err"]], .data[["log10p"]])
|
select(.data[["estim"]], .data[["std.err"]], .data[["log10p"]])
|
||||||
|
|
||||||
if(i == 2){ # save .bk and .rds file the first time through the loop.
|
if(!first_gwas_ok){ # save .bk and .rds file the first time through the loop.
|
||||||
if (!grepl("_$", suffix) & suffix != ""){
|
if (!grepl("_$", suffix) & suffix != ""){
|
||||||
suffix <- paste0("_", suffix)
|
suffix <- paste0("_", suffix)
|
||||||
}
|
}
|
||||||
fbm.name <- paste0(outputdir, "gwas_effects", suffix)
|
first_gwas_ok <- TRUE
|
||||||
|
fbm.name <- file.path(outputdir, paste0("gwas_effects", suffix))
|
||||||
|
|
||||||
colnames_fbm <- c(paste0(phename, "_Effect"), paste0(phename, "_SE"),
|
colnames_fbm <- c(paste0(phename, "_Effect"), paste0(phename, "_SE"),
|
||||||
paste0(phename, "_log10p"))
|
paste0(phename, "_log10p"))
|
||||||
@@ -179,18 +181,43 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
colnames_fbm <- c(colnames_fbm, paste0(phename, "_Effect"),
|
colnames_fbm <- c(colnames_fbm, paste0(phename, "_Effect"),
|
||||||
paste0(phename, "_SE"), paste0(phename, "_log10p"))
|
paste0(phename, "_SE"), paste0(phename, "_log10p"))
|
||||||
gwas2$add_columns(ncol_add = 3)
|
gwas2$add_columns(ncol_add = 3)
|
||||||
gwas2[,c(i*3-5, i*3-4, i*3-3)] <- gwas
|
gwas2[, c(sum(gwas_ok)*3 - 2, sum(gwas_ok)*3 - 1,
|
||||||
|
sum(gwas_ok)*3)] <- gwas
|
||||||
gwas2$save()
|
gwas2$save()
|
||||||
gwas_metadata <- add_row(gwas_metadata, phe = phename, type = type[i-1],
|
gwas_metadata <- add_row(gwas_metadata, phe = phename, type = type[i - 1],
|
||||||
nsnp = nSNP, npcs = PC_df$NumPCs, nphe = nPhe,
|
nsnp = nSNP, npcs = PC_df$NumPCs, nphe = nPhe,
|
||||||
nlev = nLev, lambda_GC = PC_df$lambda_GC,
|
nlev = nLev, lambda_GC = PC_df$lambda_GC,
|
||||||
bonferroni = bonferroni)
|
bonferroni = bonferroni)
|
||||||
}
|
}
|
||||||
|
# plot Manhattan and QQ if save.plots == TRUE
|
||||||
|
if (save.plots == TRUE) {
|
||||||
|
# set aspect ratio based on number of SNPs in snp file
|
||||||
|
asp <- log10(snp$genotypes$ncol)/2
|
||||||
|
if(asp < 1.1){
|
||||||
|
asp <- 1.1
|
||||||
|
}
|
||||||
|
|
||||||
|
manhattan <- get_manhattan(X = gwas2, ind = sum(gwas_ok)*3, snp = snp,
|
||||||
|
thresh = bonferroni)
|
||||||
|
plotname <- paste0(gwas_data$phe, "_", gwas_data$type, "_model_",
|
||||||
|
gwas_data$nphe, "g_", gwas_data$nsnp, "_SNPs_",
|
||||||
|
gwas_data$npcs, "_PCs.png")
|
||||||
|
save_plot(filename = file.path(outputdir, paste0("QQplot_", plotname)),
|
||||||
|
plot = qqplot, base_asp = 1, base_height = 4)
|
||||||
|
save_plot(filename = file.path(outputdir, paste0("Manhattan_", plotname)),
|
||||||
|
plot = manhattan, base_asp = asp, base_height = 3.75)
|
||||||
|
|
||||||
|
}
|
||||||
rm(gwas)
|
rm(gwas)
|
||||||
message(paste0("Finished phenotype ", i-1, ": ", names(df)[i]))
|
printf2(verbose = verbose, "\nFinished GWAS on phenotype %s. ",
|
||||||
|
names(df)[i])
|
||||||
|
} else {
|
||||||
|
printf2(verbose = verbose, "\nSkipping GWAS on phenotype %s. ",
|
||||||
|
names(df)[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
printf2(verbose = verbose, "\nNow preparing gwas effects for use in mash.\n")
|
||||||
# 4. mash input ----
|
# 4. mash input ----
|
||||||
## prioritize effects with max(log10p) or max(sum(log10p))
|
## prioritize effects with max(log10p) or max(sum(log10p))
|
||||||
## make a random set of relatively unlinked SNPs
|
## make a random set of relatively unlinked SNPs
|
||||||
@@ -198,7 +225,7 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
ind_se <- (1:sum(gwas_ok))*3 - 1
|
ind_se <- (1:sum(gwas_ok))*3 - 1
|
||||||
ind_p <- (1:sum(gwas_ok))*3
|
ind_p <- (1:sum(gwas_ok))*3
|
||||||
|
|
||||||
if(thr.m == "sum"){
|
if (thr.m == "sum") {
|
||||||
thr_log10p <- big_apply(gwas2,
|
thr_log10p <- big_apply(gwas2,
|
||||||
a.FUN = function(X, ind) rowSums(X[, ind]),
|
a.FUN = function(X, ind) rowSums(X[, ind]),
|
||||||
ind = ind_p,
|
ind = ind_p,
|
||||||
@@ -211,21 +238,21 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
}
|
}
|
||||||
gwas2$add_columns(ncol_add = 1)
|
gwas2$add_columns(ncol_add = 1)
|
||||||
colnames_fbm <- c(colnames_fbm, paste0(thr.m, "_thr_log10p"))
|
colnames_fbm <- c(colnames_fbm, paste0(thr.m, "_thr_log10p"))
|
||||||
gwas2[,(sum(gwas_ok)*3+1)] <- thr_log10p
|
gwas2[,(sum(gwas_ok)*3 + 1)] <- thr_log10p
|
||||||
gwas2$save()
|
gwas2$save()
|
||||||
|
|
||||||
## replace NA or Nan values
|
## replace NA or Nan values
|
||||||
# Replace SE with 1's, estimates and p values with 0's.
|
# Replace SE with 1's, estimates and p values with 0's.
|
||||||
replace_na_1 <- function(X, ind) replace_na(X[, ind], 1)
|
replace_na_1 <- function(X, ind) replace_na(X[, ind], 1)
|
||||||
replace_na_0 <- function(X, ind) replace_na(X[, ind], 0)
|
replace_na_0 <- function(X, ind) replace_na(X[, ind], 0)
|
||||||
gwas2[,ind_se] <- big_apply(gwas2, a.FUN = replace_na_1, ind = ind_se,
|
gwas2[, ind_se] <- big_apply(gwas2, a.FUN = replace_na_1, ind = ind_se,
|
||||||
a.combine = 'plus')
|
a.combine = 'plus')
|
||||||
gwas2[,ind_estim] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_estim,
|
gwas2[, ind_estim] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_estim,
|
||||||
a.combine = 'plus')
|
a.combine = 'plus')
|
||||||
gwas2[,ind_p] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_p,
|
gwas2[, ind_p] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_p,
|
||||||
a.combine = 'plus')
|
a.combine = 'plus')
|
||||||
gwas2[,(sum(gwas_ok)*3+1)] <- big_apply(gwas2, a.FUN = replace_na_0,
|
gwas2[, (sum(gwas_ok)*3+1)] <- big_apply(gwas2, a.FUN = replace_na_0,
|
||||||
ind = (sum(gwas_ok)*3+1),
|
ind = (sum(gwas_ok)*3 + 1),
|
||||||
a.combine = 'plus')
|
a.combine = 'plus')
|
||||||
gwas2$save()
|
gwas2$save()
|
||||||
|
|
||||||
@@ -301,31 +328,26 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
# 5. mash ----
|
# 5. mash ----
|
||||||
|
|
||||||
data_r <- mashr::mash_set_data(Bhat_random, Shat_random)
|
data_r <- mashr::mash_set_data(Bhat_random, Shat_random)
|
||||||
message(paste0("Estimating the correlation structure in the null tests from ",
|
printf2(verbose = verbose, "\nEstimating correlation structure in the null tests from a random sample of clumped data.")
|
||||||
"the random data.
|
|
||||||
(not the strong data because it will not necessarily contain
|
|
||||||
any null tests)."))
|
|
||||||
Vhat <- mashr::estimate_null_correlation_simple(data = data_r)
|
Vhat <- mashr::estimate_null_correlation_simple(data = data_r)
|
||||||
|
|
||||||
message(paste0("Setting up the main data objects with this correlation ",
|
|
||||||
"structure in place."))
|
|
||||||
data_strong <- mashr::mash_set_data(Bhat_strong, Shat_strong, V = Vhat)
|
data_strong <- mashr::mash_set_data(Bhat_strong, Shat_strong, V = Vhat)
|
||||||
data_random <- mashr::mash_set_data(Bhat_random, Shat_random, V = Vhat)
|
data_random <- mashr::mash_set_data(Bhat_random, Shat_random, V = Vhat)
|
||||||
data_full <- mashr::mash_set_data(Bhat_full, Shat_full, V = Vhat)
|
data_full <- mashr::mash_set_data(Bhat_full, Shat_full, V = Vhat)
|
||||||
U_c <- mashr::cov_canonical(data_random)
|
U_c <- mashr::cov_canonical(data_random)
|
||||||
|
|
||||||
if (!is.na(U.ed[1])) {
|
if (is.na(U.ed[1])) {
|
||||||
message(paste0("Now estimating data-driven covariances using the strong",
|
printf2(verbose = verbose, "\nNow estimating data-driven covariances using
|
||||||
" tests.
|
the strong tests. NB: This step may take some time to complete.\n")
|
||||||
NB: This step may take some time to complete."))
|
if (length(ind_p) < 6) {
|
||||||
if (length(ind_p) < 6) {
|
cov_npc <- ind_p - 1
|
||||||
cov_npc <- ind_p - 1
|
} else {
|
||||||
} else {
|
cov_npc <- 5
|
||||||
cov_npc <- 5
|
}
|
||||||
}
|
U_pca = mashr::cov_pca(data_strong, npc = cov_npc)
|
||||||
U_pca = mashr::cov_pca(data_strong, npc = cov_npc)
|
U_ed = mashr::cov_ed(data_strong, U_pca)
|
||||||
U_ed = mashr::cov_ed(data_strong, U_pca)
|
saveRDS(U_ed, file = file.path(outputdir, paste0("Mash_U_ed", suffix,
|
||||||
saveRDS(U_ed, file = paste0(outputdir, "Mash_U_ed", suffix, ".rds"))
|
".rds")))
|
||||||
} else if (typeof(U.ed) == "list") {
|
} else if (typeof(U.ed) == "list") {
|
||||||
U_ed <- U.ed
|
U_ed <- U.ed
|
||||||
} else if (typeof(U.ed) == "character") {
|
} else if (typeof(U.ed) == "character") {
|
||||||
@@ -337,12 +359,15 @@ dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "",
|
|||||||
|
|
||||||
if (typeof(U.hyp) == "list") {
|
if (typeof(U.hyp) == "list") {
|
||||||
m = mashr::mash(data_random, Ulist = c(U_ed, U_c, U.hyp), outputlevel = 1)
|
m = mashr::mash(data_random, Ulist = c(U_ed, U_c, U.hyp), outputlevel = 1)
|
||||||
} else {
|
} else if (typeof(U.hyp) == "character") {
|
||||||
|
U_hyp <- readRDS(file = U.hyp)
|
||||||
|
m = mashr::mash(data_random, Ulist = c(U_ed, U_c, U_hyp), outputlevel = 1)
|
||||||
|
} else {
|
||||||
m = mashr::mash(data_random, Ulist = c(U_ed, U_c), outputlevel = 1)
|
m = mashr::mash(data_random, Ulist = c(U_ed, U_c), outputlevel = 1)
|
||||||
|
printf2(verbose = verbose, "\nNo user-specified covariance matrices were included in the mash fit.")
|
||||||
}
|
}
|
||||||
message(paste0("Compute posterior matrices for all effects",
|
printf2(verbose = verbose, "\nComputing posterior weights for all effects
|
||||||
" using the mash fit from the
|
using the mash fit from the random tests.")
|
||||||
random tests."))
|
|
||||||
m2 = mashr::mash(data_full, g = ashr::get_fitted_g(m), fixg = TRUE)
|
m2 = mashr::mash(data_full, g = ashr::get_fitted_g(m), fixg = TRUE)
|
||||||
|
|
||||||
return(m2)
|
return(m2)
|
||||||
@@ -429,6 +454,9 @@ div_gwas <- function(df, snp, type, svd, npcs){
|
|||||||
return(gwaspc)
|
return(gwaspc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' Verbose?
|
||||||
|
#' @importFrom bigassertr printf
|
||||||
|
printf2 <- function(verbose, ...) if (verbose) { printf(...) }
|
||||||
|
|
||||||
#' Create a quantile-quantile plot with ggplot2.
|
#' Create a quantile-quantile plot with ggplot2.
|
||||||
#'
|
#'
|
||||||
@@ -544,17 +572,27 @@ round_xy <- function(x, y, cl = NA, cu = NA, roundby = 0.001){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
get_manhattan <- function(log10p, snp, thresh){
|
get_manhattan <- function(X, ind, snp, thresh){
|
||||||
|
roundFBM <- function(X, ind, at) ceiling(X[, ind] / at) * at
|
||||||
|
observed <- big_apply(X, ind = ind, a.FUN = roundFBM, at = 0.01,
|
||||||
|
a.combine = 'plus')
|
||||||
|
|
||||||
plot_data <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos,
|
plot_data <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos,
|
||||||
marker.ID = snp$map$marker.ID, log10p = log10p) %>%
|
marker.ID = snp$map$marker.ID, observed = observed)
|
||||||
mutate(observed = round2(.data$log10p, at = 0.001)) %>%
|
|
||||||
|
if (length(unique(snp$map$physical.pos)) >= 500000) {
|
||||||
|
plot_data <- plot_data %>%
|
||||||
|
mutate(POS = round2(.data$POS, at = 250000))
|
||||||
|
}
|
||||||
|
plot_data <- plot_data %>%
|
||||||
group_by(.data$CHR, .data$POS, .data$observed) %>%
|
group_by(.data$CHR, .data$POS, .data$observed) %>%
|
||||||
slice(1)
|
slice(1) %>%
|
||||||
|
mutate(CHR = as.factor(.data$CHR))
|
||||||
|
|
||||||
nchr <- length(unique(plot_data$CHR))
|
nchr <- length(unique(plot_data$CHR))
|
||||||
|
|
||||||
p1 <- plot_data %>%
|
p1 <- plot_data %>%
|
||||||
ggplot(aes(x = .data$POS, y = .data$log10p)) +
|
ggplot(aes(x = .data$POS, y = .data$observed)) +
|
||||||
geom_point(aes(color = .data$CHR, fill = .data$CHR)) +
|
geom_point(aes(color = .data$CHR, fill = .data$CHR)) +
|
||||||
geom_hline(yintercept = thresh, color = "black", linetype = 2,
|
geom_hline(yintercept = thresh, color = "black", linetype = 2,
|
||||||
size = 1) +
|
size = 1) +
|
||||||
@@ -583,7 +621,7 @@ get_manhattan <- function(log10p, snp, thresh){
|
|||||||
strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0),
|
strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0),
|
||||||
strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) +
|
strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) +
|
||||||
labs(x = "Chromosome", y = "-log10(p value)") +
|
labs(x = "Chromosome", y = "-log10(p value)") +
|
||||||
scale_x_continuous(expand = c(0.2, 0.2))
|
scale_x_continuous(expand = c(0.15, 0.15))
|
||||||
return(p1)
|
return(p1)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -737,10 +775,9 @@ get_lambdagc <- function(ps, tol = 1e-8){
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#' Return best number of PCs in terms of lambda_GC for Panicum virgatum.
|
#' Return best number of PCs in terms of lambda_GC
|
||||||
#' Return best number of PCs in terms of lambda_GC for the CDBN.
|
|
||||||
#'
|
#'
|
||||||
#' @description Given a dataframe created using pvdiv_lambda_GC, this function
|
#' @description Given a dataframe created using div_lambda_GC, this function
|
||||||
#' returns the first lambda_GC less than 1.05, or the smallest lambda_GC,
|
#' returns the first lambda_GC less than 1.05, or the smallest lambda_GC,
|
||||||
#' for each column in the dataframe.
|
#' for each column in the dataframe.
|
||||||
#'
|
#'
|
||||||
@@ -812,3 +849,35 @@ check_gwas <- function(df1, phename, type, nPhe, minphe, nLev){
|
|||||||
}
|
}
|
||||||
return(gwas_ok)
|
return(gwas_ok)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
## @title Basic sanity check for covariance matrices
|
||||||
|
## @param X input matrix
|
||||||
|
check_covmat_basics = function(x) {
|
||||||
|
label = substitute(x)
|
||||||
|
if (!is.matrix(x))
|
||||||
|
labelled_stop(label, "is not a matrix")
|
||||||
|
if (!is.numeric(x))
|
||||||
|
labelled_stop(label, "is not a numeric matrix")
|
||||||
|
if (any(is.na(x)))
|
||||||
|
labelled_stop(label, "cannot contain NA values")
|
||||||
|
if (any(is.infinite(x)))
|
||||||
|
labelled_stop(label, "cannot contain Inf values")
|
||||||
|
if (any(is.nan(x)))
|
||||||
|
labelled_stop(label, "cannot contain NaN values")
|
||||||
|
if (nrow(x) != ncol(x))
|
||||||
|
labelled_stop(label, "is not a square matrix")
|
||||||
|
if (!isSymmetric(x, check.attributes = FALSE))
|
||||||
|
labelled_stop(label, "is not a symmetric matrix")
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
## @title check matrix for positive definitness
|
||||||
|
## @param X input matrix
|
||||||
|
check_positive_definite = function(x) {
|
||||||
|
check_covmat_basics(x)
|
||||||
|
tryCatch(chol(x),
|
||||||
|
error = function(e) labelled_stop(substitute(x),
|
||||||
|
"must be positive definite"))
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
|||||||
@@ -20,7 +20,8 @@ dive_phe2mash(
|
|||||||
scale.phe = TRUE,
|
scale.phe = TRUE,
|
||||||
roll.size = 50,
|
roll.size = 50,
|
||||||
U.ed = NA,
|
U.ed = NA,
|
||||||
U.hyp = NA
|
U.hyp = NA,
|
||||||
|
verbose = TRUE
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
@@ -73,6 +74,8 @@ generating these once and reusing them for multiple mash runs can save time.}
|
|||||||
\item{U.hyp}{Other covariance matrices for mash. Specify these as a list. These
|
\item{U.hyp}{Other covariance matrices for mash. Specify these as a list. These
|
||||||
matrices must have dimensions that match the number of phenotypes where
|
matrices must have dimensions that match the number of phenotypes where
|
||||||
univariate GWAS ran successfully.}
|
univariate GWAS ran successfully.}
|
||||||
|
|
||||||
|
\item{verbose}{Output some information on the iterations? Default is \code{TRUE}.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
A mash object made up of all phenotypes where univariate GWAS ran
|
A mash object made up of all phenotypes where univariate GWAS ran
|
||||||
|
|||||||
29
man/expand_cov.Rd
Normal file
29
man/expand_cov.Rd
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{expand_cov}
|
||||||
|
\alias{expand_cov}
|
||||||
|
\title{Create expanded list of covariance matrices expanded by
|
||||||
|
grid, Sigma_{lk} = omega_l U_k}
|
||||||
|
\usage{
|
||||||
|
expand_cov(Ulist, grid, usepointmass = TRUE)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{Ulist}{a list of covarance matrices}
|
||||||
|
|
||||||
|
\item{grid}{a grid of scalar values by which the covariance
|
||||||
|
matrices are to be sc}
|
||||||
|
|
||||||
|
\item{usepointmass}{if TRUE adds a point mass at 0 (null component)
|
||||||
|
to the list}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
This takes the covariance matrices in Ulist and multiplies
|
||||||
|
them by the grid values If usepointmass is TRUE then it adds a null
|
||||||
|
component.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
This is an internal (non-exported) function. This help
|
||||||
|
page provides additional documentation mainly intended for
|
||||||
|
developers and expert users.
|
||||||
|
}
|
||||||
|
\keyword{internal}
|
||||||
39
man/get_GxE.Rd
Normal file
39
man/get_GxE.Rd
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_GxE}
|
||||||
|
\alias{get_GxE}
|
||||||
|
\title{Get data frames of types of GxE from a mash object}
|
||||||
|
\usage{
|
||||||
|
get_GxE(m, factor = 0.4, thresh = 0.05)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{An object of type mash}
|
||||||
|
|
||||||
|
\item{factor}{a number between 0 and 1. The factor within which effects are
|
||||||
|
considered to be shared.}
|
||||||
|
|
||||||
|
\item{thresh}{Numeric. The threshold for including an effect in the assessment}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A list containing eight data frames. Those with names that start
|
||||||
|
"S_" contain significant effects of different types between pairs of
|
||||||
|
named rows and columns. S_all_pairwise contains all significant effects;
|
||||||
|
NS_pairwise contains all non-significant effects. S_CN contains effects
|
||||||
|
significant in only one condition, and effects with a significantly
|
||||||
|
different magnitude (differential sensitivity). This dataframe is not
|
||||||
|
conservative using the local false sign rate test - we can't determine
|
||||||
|
the sign of one of the effects for effects significant in only one
|
||||||
|
condition - so it's not recommended to use this, but included. S_2_no
|
||||||
|
contains effects significant in both conditions that do not differ
|
||||||
|
significantly in magnitude. These effects do not have GxE. S_AP contains
|
||||||
|
effects significant in both conditions that differ in their sign - and
|
||||||
|
have antagonistic pleiotropy. S_DS contains effects significant in both
|
||||||
|
conditions that differ in the magnitude of their effect, but not their
|
||||||
|
sign - differentially sensitive alleles. S_1_row and S_1_col contain
|
||||||
|
effects that are significant in just one of the two conditions - the row
|
||||||
|
or the column, respectively.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Performs set operations to determine pairwise GxE for effects
|
||||||
|
from a mash object.
|
||||||
|
}
|
||||||
18
man/get_U_by_mass.Rd
Normal file
18
man/get_U_by_mass.Rd
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_U_by_mass}
|
||||||
|
\alias{get_U_by_mass}
|
||||||
|
\title{Get the positions of objects in a mash object Ulist that are above
|
||||||
|
some mass threshold.}
|
||||||
|
\usage{
|
||||||
|
get_U_by_mass(m, thresh = 0.05)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{An object of type mash}
|
||||||
|
|
||||||
|
\item{thresh}{Numeric. The mass threshold for including a covariance matrix}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Get the positions of objects in a mash object Ulist that are
|
||||||
|
above some mass threshold.
|
||||||
|
}
|
||||||
@@ -2,8 +2,7 @@
|
|||||||
% Please edit documentation in R/wrapper.R
|
% Please edit documentation in R/wrapper.R
|
||||||
\name{get_best_PC_df}
|
\name{get_best_PC_df}
|
||||||
\alias{get_best_PC_df}
|
\alias{get_best_PC_df}
|
||||||
\title{Return best number of PCs in terms of lambda_GC for Panicum virgatum.
|
\title{Return best number of PCs in terms of lambda_GC}
|
||||||
Return best number of PCs in terms of lambda_GC for the CDBN.}
|
|
||||||
\usage{
|
\usage{
|
||||||
get_best_PC_df(df)
|
get_best_PC_df(df)
|
||||||
}
|
}
|
||||||
@@ -16,7 +15,7 @@ A dataframe containing the best lambda_GC value and number of PCs
|
|||||||
for each phenotype in the data frame.
|
for each phenotype in the data frame.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Given a dataframe created using pvdiv_lambda_GC, this function
|
Given a dataframe created using div_lambda_GC, this function
|
||||||
returns the first lambda_GC less than 1.05, or the smallest lambda_GC,
|
returns the first lambda_GC less than 1.05, or the smallest lambda_GC,
|
||||||
for each column in the dataframe.
|
for each column in the dataframe.
|
||||||
}
|
}
|
||||||
|
|||||||
24
man/get_colnames.Rd
Normal file
24
man/get_colnames.Rd
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_colnames}
|
||||||
|
\alias{get_colnames}
|
||||||
|
\title{Get column names from a mash object}
|
||||||
|
\usage{
|
||||||
|
get_colnames(m)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{An object of type mash}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A vector of phenotype names
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
This function extracts the column names from the local false
|
||||||
|
sign rate table of a mash object's results. This can tell you the condition
|
||||||
|
names or phenotype names used in the mash object. That can be useful for
|
||||||
|
looking at a subset of these columns, say.
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
\dontrun{get_colnames(m = mash_obj)}
|
||||||
|
|
||||||
|
}
|
||||||
17
man/get_date_filename.Rd
Normal file
17
man/get_date_filename.Rd
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_date_filename}
|
||||||
|
\alias{get_date_filename}
|
||||||
|
\title{Get current date-time in a filename-appropriate format.}
|
||||||
|
\usage{
|
||||||
|
get_date_filename()
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A string containing the current date-time with spaces and colons
|
||||||
|
replaced with underscores and periods, respectively.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Converts the current \code{Sys.time()} system time to a format
|
||||||
|
that is acceptable to include in a filename. Changes punctuation that
|
||||||
|
won't work in a filename.
|
||||||
|
}
|
||||||
38
man/get_estimated_pi.Rd
Normal file
38
man/get_estimated_pi.Rd
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_estimated_pi}
|
||||||
|
\alias{get_estimated_pi}
|
||||||
|
\title{Return the estimated mixture proportions. Use get_estimated_pi to
|
||||||
|
extract the estimates of the mixture proportions for different types of
|
||||||
|
covariance matrix. This tells you which covariance matrices have most of
|
||||||
|
the mass.}
|
||||||
|
\usage{
|
||||||
|
get_estimated_pi(m, dimension = c("cov", "grid", "all"))
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{the mash result}
|
||||||
|
|
||||||
|
\item{dimension}{indicates whether you want the mixture proportions for the
|
||||||
|
covariances, grid, or all}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
a named vector containing the estimated mixture proportions.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Return the estimated mixture proportions. Use get_estimated_pi to
|
||||||
|
extract the estimates of the mixture proportions for different types of
|
||||||
|
covariance matrix. This tells you which covariance matrices have most of
|
||||||
|
the mass.
|
||||||
|
}
|
||||||
|
\details{
|
||||||
|
If the fit was done with \code{usepointmass=TRUE} then the first
|
||||||
|
element of the returned vector will correspond to the null, and the
|
||||||
|
remaining elements to the non-null covariance matrices. Suppose the fit
|
||||||
|
was done with $K$ covariances and a grid of length $L$. If
|
||||||
|
\code{dimension=cov} then the returned vector will be of length $K$
|
||||||
|
(or $K+1$ if \code{usepointmass=TRUE}). If \code{dimension=grid} then
|
||||||
|
the returned vector will be of length $L$ (or $L+1$). If
|
||||||
|
\code{dimension=all} then the returned vector will be of length $LK$ (or
|
||||||
|
$LK+1$). The names of the vector will be informative for which
|
||||||
|
combination each element corresponds to.
|
||||||
|
}
|
||||||
21
man/get_log10bf.Rd
Normal file
21
man/get_log10bf.Rd
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_log10bf}
|
||||||
|
\alias{get_log10bf}
|
||||||
|
\title{Return the Bayes Factor for each effect}
|
||||||
|
\usage{
|
||||||
|
get_log10bf(m)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{the mash result (from joint or 1by1 analysis); must have been
|
||||||
|
computed using usepointmass = TRUE}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
if m was fitted using usepointmass=TRUE then returns a vector of
|
||||||
|
the log10(bf) values for each effect. That is, the jth element
|
||||||
|
lbf_j is log10(Pr(Bj | g = ghat-nonnull)/Pr(Bj | g = 0)) where gha
|
||||||
|
t-nonnull is the non-null part of ghat. Otherwise returns NULL.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Return the Bayes Factor for each effect
|
||||||
|
}
|
||||||
18
man/get_marker_df.Rd
Normal file
18
man/get_marker_df.Rd
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_marker_df}
|
||||||
|
\alias{get_marker_df}
|
||||||
|
\title{Get mash marker_df}
|
||||||
|
\usage{
|
||||||
|
get_marker_df(m, snp)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{An object of type mash}
|
||||||
|
|
||||||
|
\item{snp}{A bigSNP object, produced by the bigsnpr package. Here, the WAMI
|
||||||
|
SNP information.}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Pulls SNP markers information in the mash object from a bigsnp
|
||||||
|
object.
|
||||||
|
}
|
||||||
28
man/get_n_significant_conditions.Rd
Normal file
28
man/get_n_significant_conditions.Rd
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_n_significant_conditions}
|
||||||
|
\alias{get_n_significant_conditions}
|
||||||
|
\title{Count number of conditions each effect is significant in}
|
||||||
|
\usage{
|
||||||
|
get_n_significant_conditions(
|
||||||
|
m,
|
||||||
|
thresh = 0.05,
|
||||||
|
conditions = NULL,
|
||||||
|
sig_fn = get_lfsr
|
||||||
|
)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{the mash result (from joint or 1by1 analysis)}
|
||||||
|
|
||||||
|
\item{thresh}{indicates the threshold below which to call signals significant}
|
||||||
|
|
||||||
|
\item{conditions}{which conditions to include in check (default to all)}
|
||||||
|
|
||||||
|
\item{sig_fn}{the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
a vector containing the number of significant conditions
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Count number of conditions each effect is significant in
|
||||||
|
}
|
||||||
14
man/get_ncond.Rd
Normal file
14
man/get_ncond.Rd
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_ncond}
|
||||||
|
\alias{get_ncond}
|
||||||
|
\title{Get number of conditions}
|
||||||
|
\usage{
|
||||||
|
get_ncond(m)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{The mash result}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Get number of conditions
|
||||||
|
}
|
||||||
40
man/get_pairwise_sharing.Rd
Normal file
40
man/get_pairwise_sharing.Rd
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_pairwise_sharing}
|
||||||
|
\alias{get_pairwise_sharing}
|
||||||
|
\title{Compute the proportion of (significant) signals shared by magnitude in each pair of conditions, based on the poterior mean}
|
||||||
|
\usage{
|
||||||
|
get_pairwise_sharing(m, factor = 0.5, lfsr_thresh = 0.05, FUN = identity)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{the mash fit}
|
||||||
|
|
||||||
|
\item{factor}{a number between 0 and 1 - the factor within which effects are
|
||||||
|
considered to be shared.}
|
||||||
|
|
||||||
|
\item{lfsr_thresh}{the lfsr threshold for including an effect in the
|
||||||
|
assessment}
|
||||||
|
|
||||||
|
\item{FUN}{a function to be applied to the estimated effect sizes before
|
||||||
|
assessing sharing. The most obvious choice beside the default
|
||||||
|
'FUN=identity' would be 'FUN=abs' if you want to ignore the sign of the
|
||||||
|
effects when assesing sharing.}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Compute the proportion of (significant) signals shared by magnitude in each pair of conditions, based on the poterior mean
|
||||||
|
}
|
||||||
|
\details{
|
||||||
|
For each pair of tissues, first identify the effects that are
|
||||||
|
significant (by lfsr<lfsr_thresh) in at least one of the two tissues.
|
||||||
|
Then compute what fraction of these have an estimated (posterior mean)
|
||||||
|
effect size within a factor \code{factor} of one another. The results are
|
||||||
|
returned as an R by R matrix.
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
\dontrun{
|
||||||
|
get_pairwise_sharing(m) # sharing by magnitude (same sign)
|
||||||
|
get_pairwise_sharing(m, factor=0) # sharing by sign
|
||||||
|
get_pairwise_sharing(m, FUN=abs) # sharing by magnitude when sign is ignored
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
42
man/get_significant_results.Rd
Normal file
42
man/get_significant_results.Rd
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{get_significant_results}
|
||||||
|
\alias{get_significant_results}
|
||||||
|
\title{From a mash result, get effects that are significant in at least
|
||||||
|
one condition.}
|
||||||
|
\usage{
|
||||||
|
get_significant_results(
|
||||||
|
m,
|
||||||
|
thresh = 0.05,
|
||||||
|
conditions = NULL,
|
||||||
|
sig_fn = ashr::get_lfsr
|
||||||
|
)
|
||||||
|
|
||||||
|
get_significant_results(
|
||||||
|
m,
|
||||||
|
thresh = 0.05,
|
||||||
|
conditions = NULL,
|
||||||
|
sig_fn = ashr::get_lfsr
|
||||||
|
)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{the mash result (from joint or 1by1 analysis)}
|
||||||
|
|
||||||
|
\item{thresh}{indicates the threshold below which to call signals significant}
|
||||||
|
|
||||||
|
\item{conditions}{which conditions to include in check (default to all)}
|
||||||
|
|
||||||
|
\item{sig_fn}{the significance function used to extract significance from mash object; eg could be ashr::get_lfsr or ashr::get_lfdr. (Small values must indicate significant.)}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
a vector containing the indices of the significant effects, by
|
||||||
|
order of most significant to least
|
||||||
|
|
||||||
|
a vector containing the indices of the significant effects, by order of most significant to least
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
From a mash result, get effects that are significant in at least
|
||||||
|
one condition.
|
||||||
|
|
||||||
|
From a mash result, get effects that are significant in at least one condition
|
||||||
|
}
|
||||||
35
man/mash_plot_Ulist.Rd
Normal file
35
man/mash_plot_Ulist.Rd
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{mash_plot_Ulist}
|
||||||
|
\alias{mash_plot_Ulist}
|
||||||
|
\title{ggplot of specific covariance matrix patterns}
|
||||||
|
\usage{
|
||||||
|
mash_plot_Ulist(
|
||||||
|
m,
|
||||||
|
range = NA,
|
||||||
|
saveoutput = FALSE,
|
||||||
|
suffix = "",
|
||||||
|
limits = TRUE,
|
||||||
|
labels = TRUE
|
||||||
|
)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{An object of type mash}
|
||||||
|
|
||||||
|
\item{range}{Numeric vector. Which covariance matrices should be plotted?}
|
||||||
|
|
||||||
|
\item{saveoutput}{Logical. Should the output be saved to the path?}
|
||||||
|
|
||||||
|
\item{suffix}{Character. Optional. A unique suffix used to save the files,
|
||||||
|
instead of the current date & time.}
|
||||||
|
|
||||||
|
\item{limits}{should there be plot limits of -1 and 1? Default is true.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A list of dataframes used to make the tile plots and the plots
|
||||||
|
themselves.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Creates a tile plot using ggplot of the covariance matrices
|
||||||
|
specified in the mash model.
|
||||||
|
}
|
||||||
27
man/mash_plot_covar.Rd
Normal file
27
man/mash_plot_covar.Rd
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{mash_plot_covar}
|
||||||
|
\alias{mash_plot_covar}
|
||||||
|
\title{ggplot of covariance matrix masses}
|
||||||
|
\usage{
|
||||||
|
mash_plot_covar(m, saveoutput = FALSE, suffix = "")
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{An object of type mash}
|
||||||
|
|
||||||
|
\item{saveoutput}{Logical. Should the output be saved to the path?}
|
||||||
|
|
||||||
|
\item{suffix}{Character. Optional. A unique suffix used to save the files,
|
||||||
|
instead of the current date & time.}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Creates a bar plot using ggplot of the masses that are on each
|
||||||
|
covariance matrix specified in the mash model.
|
||||||
|
}
|
||||||
|
\note{
|
||||||
|
This plot can be useful for seeing the overall patterns of effects in
|
||||||
|
the data used in mash. Non-significant effects will add mass to the
|
||||||
|
"no_effects" covariance matrix, while significant effects will add mass
|
||||||
|
to one of the other covariance matrices. You can use mash_plot_Ulist()
|
||||||
|
to plot the covariance matrix patterns themselves.
|
||||||
|
}
|
||||||
47
man/mash_plot_manhattan_by_condition.Rd
Normal file
47
man/mash_plot_manhattan_by_condition.Rd
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{mash_plot_manhattan_by_condition}
|
||||||
|
\alias{mash_plot_manhattan_by_condition}
|
||||||
|
\title{Manhattan plot in ggplot colored by significant conditions}
|
||||||
|
\usage{
|
||||||
|
mash_plot_manhattan_by_condition(
|
||||||
|
m,
|
||||||
|
snp,
|
||||||
|
cond = NA,
|
||||||
|
saveoutput = FALSE,
|
||||||
|
suffix = "",
|
||||||
|
thresh = 0.05
|
||||||
|
)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{A mash object (outputted by mash).}
|
||||||
|
|
||||||
|
\item{snp}{A bigSNP object, produced by the bigsnpr package. Here,
|
||||||
|
the WAMI SNP information.}
|
||||||
|
|
||||||
|
\item{cond}{A vector of phenotypes. Defaults to the names of each
|
||||||
|
column in the mash object.}
|
||||||
|
|
||||||
|
\item{saveoutput}{Logical. Should the output be saved to the path?}
|
||||||
|
|
||||||
|
\item{suffix}{Character. Optional. A unique suffix used to save the files,
|
||||||
|
instead of the current date & time.}
|
||||||
|
|
||||||
|
\item{thresh}{Numeric. The threshold used for the local false sign rate to
|
||||||
|
call significance in a condition.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A \code{tbl_df()} of the data used to make the Manhattan plot, and a
|
||||||
|
ggplot object containing the Manhattan.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Takes a mash object and, for some vector of phenotypes, returns
|
||||||
|
a Manhattan plot ggplot object (and its dataframe). Each SNP in the plot
|
||||||
|
is colored by the number of phenotypes it is significant for. Even and
|
||||||
|
odd chromosomes have different shapes for their SNPs, so that
|
||||||
|
chromosome identity can be determined.
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
\dontrun{manhattan_out <- mash_ggman_by_condition(m = m, saveoutput = TRUE)}
|
||||||
|
|
||||||
|
}
|
||||||
44
man/mash_plot_marker_effect.Rd
Normal file
44
man/mash_plot_marker_effect.Rd
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{mash_plot_marker_effect}
|
||||||
|
\alias{mash_plot_marker_effect}
|
||||||
|
\title{ggplot of single mash effect}
|
||||||
|
\usage{
|
||||||
|
mash_plot_marker_effect(
|
||||||
|
m,
|
||||||
|
snp = snp,
|
||||||
|
n = NA,
|
||||||
|
i = NA,
|
||||||
|
marker = TRUE,
|
||||||
|
saveoutput = FALSE,
|
||||||
|
suffix = ""
|
||||||
|
)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{An object of type mash}
|
||||||
|
|
||||||
|
\item{snp}{A bigSNP object, produced by the bigsnpr package. Here,
|
||||||
|
the WAMI SNP information.}
|
||||||
|
|
||||||
|
\item{n}{Optional. Integer or integer vector. The result number to plot, in
|
||||||
|
order of significance. 1 would be the top result, for example. Find
|
||||||
|
these with \code{\link{get_significant_results}}.}
|
||||||
|
|
||||||
|
\item{i}{Optional. Integer or integer vector. The result number to plot, in
|
||||||
|
the order of the mash object. 1 would be the first marker in the mash
|
||||||
|
object, for example. Find these with \code{\link{get_marker_df}}.}
|
||||||
|
|
||||||
|
\item{marker}{Optional. Print the marker name on the plot?}
|
||||||
|
|
||||||
|
\item{saveoutput}{Logical. Should the output be saved to the path?}
|
||||||
|
|
||||||
|
\item{suffix}{Character. Optional. A unique suffix used to save the files,
|
||||||
|
instead of the current date & time.}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Creates a plot with point estimates and standard errors for
|
||||||
|
effects of a single SNP in multiple conditions.
|
||||||
|
}
|
||||||
|
\note{
|
||||||
|
Specify only one of n or i.
|
||||||
|
}
|
||||||
45
man/mash_plot_pairwise_sharing.Rd
Normal file
45
man/mash_plot_pairwise_sharing.Rd
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{mash_plot_pairwise_sharing}
|
||||||
|
\alias{mash_plot_pairwise_sharing}
|
||||||
|
\title{Create a ggplot of pairwise sharing of mash effects}
|
||||||
|
\usage{
|
||||||
|
mash_plot_pairwise_sharing(
|
||||||
|
m = NULL,
|
||||||
|
effectRDS = NULL,
|
||||||
|
corrmatrix = NULL,
|
||||||
|
reorder = TRUE,
|
||||||
|
saveoutput = FALSE,
|
||||||
|
filename = NA,
|
||||||
|
suffix = "",
|
||||||
|
...
|
||||||
|
)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{An object of type mash}
|
||||||
|
|
||||||
|
\item{effectRDS}{An RDS containing a correlation matrix.}
|
||||||
|
|
||||||
|
\item{corrmatrix}{A correlation matrix}
|
||||||
|
|
||||||
|
\item{reorder}{Logical. Should the columns be reordered by similarity?}
|
||||||
|
|
||||||
|
\item{saveoutput}{Logical. Should the output be saved to the path?}
|
||||||
|
|
||||||
|
\item{filename}{Character string with an output filename. Optional.}
|
||||||
|
|
||||||
|
\item{suffix}{Character. Optional. A unique suffix used to save the files,
|
||||||
|
instead of the current date & time.}
|
||||||
|
|
||||||
|
\item{...}{Other arguments to \code{\link{get_pairwise_sharing}} or
|
||||||
|
\code{\link{ggcorr}}.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A list containing a dataframe containing the correlations and a
|
||||||
|
ggplot2 object containing the correlation plot.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Given a correlation matrix, an RDS with a correlation matrix, or
|
||||||
|
a mash object, create a ggplot of pairwise sharing of mash effects using
|
||||||
|
\code{\link{get_pairwise_sharing}} and \code{\link{ggcorr}}.
|
||||||
|
}
|
||||||
40
man/mash_plot_sig_by_condition.Rd
Normal file
40
man/mash_plot_sig_by_condition.Rd
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{mash_plot_sig_by_condition}
|
||||||
|
\alias{mash_plot_sig_by_condition}
|
||||||
|
\title{Significant SNPs per number of conditions}
|
||||||
|
\usage{
|
||||||
|
mash_plot_sig_by_condition(
|
||||||
|
m,
|
||||||
|
conditions = NA,
|
||||||
|
saveoutput = FALSE,
|
||||||
|
suffix = "",
|
||||||
|
thresh = 0.05
|
||||||
|
)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{m}{An object of type mash}
|
||||||
|
|
||||||
|
\item{conditions}{A vector of conditions. Get these with get_colnames(m).}
|
||||||
|
|
||||||
|
\item{saveoutput}{Logical. Save plot output to a file? Default is FALSE.}
|
||||||
|
|
||||||
|
\item{suffix}{Character. Optional. A unique suffix used to save the files,
|
||||||
|
instead of the current date & time.}
|
||||||
|
|
||||||
|
\item{thresh}{What is the threshold to call an effect significant? Default
|
||||||
|
is 0.05.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A list containing a dataframe of the number of SNPs significant per
|
||||||
|
number of conditions, and a ggplot object using that dataframe.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
For some number of columns in a mash object that correspond to
|
||||||
|
conditions, find the number of SNPs that are significant for that number
|
||||||
|
of conditions.
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
\dontrun{mash_plot_sig_by_condition(m = mash_obj, saveoutput = TRUE)}
|
||||||
|
|
||||||
|
}
|
||||||
11
man/printf2.Rd
Normal file
11
man/printf2.Rd
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/wrapper.R
|
||||||
|
\name{printf2}
|
||||||
|
\alias{printf2}
|
||||||
|
\title{Verbose?}
|
||||||
|
\usage{
|
||||||
|
printf2(verbose, ...)
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Verbose?
|
||||||
|
}
|
||||||
15
man/reorder_cormat.Rd
Normal file
15
man/reorder_cormat.Rd
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{reorder_cormat}
|
||||||
|
\alias{reorder_cormat}
|
||||||
|
\title{Reorder correlation matrix}
|
||||||
|
\usage{
|
||||||
|
reorder_cormat(cormat)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{cormat}{A correlation matrix}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Reorder correlation coefficients from a matrix of things
|
||||||
|
(including NA's) and hierarchically cluster them
|
||||||
|
}
|
||||||
23
man/scale_cov.Rd
Normal file
23
man/scale_cov.Rd
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mash-evaluation.R
|
||||||
|
\name{scale_cov}
|
||||||
|
\alias{scale_cov}
|
||||||
|
\title{Scale each covariance matrix in list Ulist by a scalar in
|
||||||
|
vector grid}
|
||||||
|
\usage{
|
||||||
|
scale_cov(Ulist, grid)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{Ulist}{a list of matrices}
|
||||||
|
|
||||||
|
\item{grid}{a vector of scaling factors (standard deviaions)}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
a list with length length(Ulist)*length(grid)
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
This is an internal (non-exported) function. This help
|
||||||
|
page provides additional documentation mainly intended for
|
||||||
|
developers and expert users.
|
||||||
|
}
|
||||||
|
\keyword{internal}
|
||||||
Reference in New Issue
Block a user