dive_phe2mash function v1 to test on several plant species
This commit is contained in:
34
man/div_gwas.Rd
Normal file
34
man/div_gwas.Rd
Normal file
@@ -0,0 +1,34 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/wrapper.R
|
||||
\name{div_gwas}
|
||||
\alias{div_gwas}
|
||||
\title{Wrapper for bigsnpr for GWAS}
|
||||
\usage{
|
||||
div_gwas(df, snp, type, svd, npcs)
|
||||
}
|
||||
\arguments{
|
||||
\item{df}{Dataframe of phenotypes where the first column is sample.ID}
|
||||
|
||||
\item{snp}{Genomic information to include for wheat.}
|
||||
|
||||
\item{type}{Character string. Type of univarate regression to run for GWAS.
|
||||
Options are "linear" or "logistic".}
|
||||
|
||||
\item{svd}{Optional covariance matrix to include in the regression. You
|
||||
can generate these using \code{bigsnpr::snp_autoSVD()}.}
|
||||
|
||||
\item{npcs}{Integer. Number of PCs to use for population structure correction.}
|
||||
}
|
||||
\value{
|
||||
The gwas results for the last phenotype in the dataframe. That
|
||||
phenotype, as well as the remaining phenotypes, are saved as RDS objects
|
||||
in the working directory.
|
||||
}
|
||||
\description{
|
||||
Given a dataframe of phenotypes associated with sample.IDs, this
|
||||
function is a wrapper around bigsnpr functions to conduct linear or
|
||||
logistic regression on wheat. The main advantages of this
|
||||
function over just using the bigsnpr functions is that it automatically
|
||||
removes individual genotypes with missing phenotypic data
|
||||
and that it can run GWAS on multiple phenotypes sequentially.
|
||||
}
|
||||
48
man/div_lambda_GC.Rd
Normal file
48
man/div_lambda_GC.Rd
Normal file
@@ -0,0 +1,48 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/wrapper.R
|
||||
\name{div_lambda_GC}
|
||||
\alias{div_lambda_GC}
|
||||
\title{Return lambda_GC for different numbers of PCs for GWAS on Panicum virgatum.}
|
||||
\usage{
|
||||
div_lambda_GC(
|
||||
df,
|
||||
type = c("linear", "logistic"),
|
||||
snp,
|
||||
svd = NA,
|
||||
ncores = 1,
|
||||
npcs = c(0:10),
|
||||
saveoutput = FALSE
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{df}{Dataframe of phenotypes where the first column is sample.ID and each
|
||||
sample.ID occurs only once in the dataframe.}
|
||||
|
||||
\item{type}{Character string. Type of univarate regression to run for GWAS.
|
||||
Options are "linear" or "logistic".}
|
||||
|
||||
\item{snp}{A bigSNP object with sample.IDs that match the df.}
|
||||
|
||||
\item{svd}{big_SVD object; Covariance matrix to include in the regression.
|
||||
Generate these using \code{bigsnpr::snp_autoSVD()}.}
|
||||
|
||||
\item{ncores}{Number of cores to use. Default is one.}
|
||||
|
||||
\item{npcs}{Integer vector of principle components to use.
|
||||
Defaults to c(0:10).}
|
||||
|
||||
\item{saveoutput}{Logical. Should output be saved as a csv to the
|
||||
working directory?}
|
||||
}
|
||||
\value{
|
||||
A dataframe containing the lambda_GC values for each number of PCs
|
||||
specified. This is also saved as a .csv file in the working directory.
|
||||
}
|
||||
\description{
|
||||
Given a dataframe of phenotypes associated with sample.IDs and
|
||||
output from a PCA to control for population structure, this function will
|
||||
return a .csv file of the lambda_GC values for the GWAS upon inclusion
|
||||
of different numbers of PCs. This allows the user to choose a number of
|
||||
PCs that returns a lambda_GC close to 1, and thus ensure that they have
|
||||
done adequate correction for population structure.
|
||||
}
|
||||
89
man/dive_phe2mash.Rd
Normal file
89
man/dive_phe2mash.Rd
Normal file
@@ -0,0 +1,89 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/wrapper.R
|
||||
\name{dive_phe2mash}
|
||||
\alias{dive_phe2mash}
|
||||
\title{Wrapper to run mash given a phenotype data frame}
|
||||
\usage{
|
||||
dive_phe2mash(
|
||||
df,
|
||||
snp,
|
||||
type = "linear",
|
||||
svd = NULL,
|
||||
suffix = "",
|
||||
outputdir = ".",
|
||||
min.phe = 200,
|
||||
save.plots = TRUE,
|
||||
thr.r2 = 0.2,
|
||||
thr.m = c("sum", "max"),
|
||||
num.strong = 1000,
|
||||
num.random = NA,
|
||||
scale.phe = TRUE,
|
||||
roll.size = 50,
|
||||
U.ed = NA,
|
||||
U.hyp = NA
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{df}{Dataframe containing phenotypes for mash where the first column is
|
||||
'sample.ID', which should match values in the snp$fam$sample.ID column.}
|
||||
|
||||
\item{snp}{A "bigSNP" object; load with \code{snp_attach()}.}
|
||||
|
||||
\item{type}{Character string, or a character vector the length of the number
|
||||
of phenotypes. Type of univarate regression to run for GWAS.
|
||||
Options are "linear" or "logistic".}
|
||||
|
||||
\item{svd}{A "big_SVD" object; Optional covariance matrix to use for
|
||||
population structure correction.}
|
||||
|
||||
\item{suffix}{Optional character vector to give saved files a unique search string/name.}
|
||||
|
||||
\item{outputdir}{Optional file path to save output files.}
|
||||
|
||||
\item{min.phe}{Integer. Minimum number of individuals phenotyped in order to
|
||||
include that phenotype in GWAS. Default is 200. Use lower values with
|
||||
caution.}
|
||||
|
||||
\item{save.plots}{Logical. Should Manhattan and QQ-plots be generated and
|
||||
saved to the working directory for univariate GWAS? Default is TRUE.}
|
||||
|
||||
\item{thr.r2}{Value between 0 and 1. Threshold of r2 measure of linkage
|
||||
disequilibrium. Markers in higher LD than this will be subset using clumping.}
|
||||
|
||||
\item{thr.m}{"sum" or "max". Type of threshold to use to clump values for
|
||||
mash inputs. "sum" sums the -log10pvalues for each phenotype and uses
|
||||
the maximum of this value as the threshold. "max" uses the maximum
|
||||
-log10pvalue for each SNP across all of the univariate GWAS.}
|
||||
|
||||
\item{num.strong}{Integer. Number of SNPs used to derive data-driven covariance
|
||||
matrix patterns, using markers with strong effects on phenotypes.}
|
||||
|
||||
\item{num.random}{Integer. Number of SNPs used to derive the correlation structure
|
||||
of the null tests, and the mash fit on the null tests.}
|
||||
|
||||
\item{scale.phe}{Logical. Should effects for each phenotype be scaled to fall
|
||||
between -1 and 1? Default is TRUE.}
|
||||
|
||||
\item{roll.size}{Integer. Used to create the svd for GWAS.}
|
||||
|
||||
\item{U.ed}{Mash data-driven covariance matrices. Specify these as a list or a path
|
||||
to a file saved as an .rds. Creating these can be time-consuming, and
|
||||
generating these once and reusing them for multiple mash runs can save time.}
|
||||
|
||||
\item{U.hyp}{Other covariance matrices for mash. Specify these as a list. These
|
||||
matrices must have dimensions that match the number of phenotypes where
|
||||
univariate GWAS ran successfully.}
|
||||
}
|
||||
\value{
|
||||
A mash object made up of all phenotypes where univariate GWAS ran
|
||||
successfully.
|
||||
}
|
||||
\description{
|
||||
Though step-by-step GWAS, preparation of mash inputs, and mash
|
||||
allows you the most flexibility and opportunities to check your results
|
||||
for errors, once those sanity checks are complete, this function allows
|
||||
you to go from a phenotype data.frame of a few phenotypes you want to
|
||||
compare to a mash result. Some exception handling has been built into
|
||||
this function, but the user should stay cautious and skeptical of any
|
||||
results that seem 'too good to be true'.
|
||||
}
|
||||
22
man/get_best_PC_df.Rd
Normal file
22
man/get_best_PC_df.Rd
Normal file
@@ -0,0 +1,22 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/wrapper.R
|
||||
\name{get_best_PC_df}
|
||||
\alias{get_best_PC_df}
|
||||
\title{Return best number of PCs in terms of lambda_GC for Panicum virgatum.
|
||||
Return best number of PCs in terms of lambda_GC for the CDBN.}
|
||||
\usage{
|
||||
get_best_PC_df(df)
|
||||
}
|
||||
\arguments{
|
||||
\item{df}{Dataframe of phenotypes where the first column is NumPCs and
|
||||
subsequent column contains lambda_GC values for some phenotype.}
|
||||
}
|
||||
\value{
|
||||
A dataframe containing the best lambda_GC value and number of PCs
|
||||
for each phenotype in the data frame.
|
||||
}
|
||||
\description{
|
||||
Given a dataframe created using pvdiv_lambda_GC, this function
|
||||
returns the first lambda_GC less than 1.05, or the smallest lambda_GC,
|
||||
for each column in the dataframe.
|
||||
}
|
||||
19
man/get_lambdagc.Rd
Normal file
19
man/get_lambdagc.Rd
Normal file
@@ -0,0 +1,19 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/wrapper.R
|
||||
\name{get_lambdagc}
|
||||
\alias{get_lambdagc}
|
||||
\title{Find lambda_GC value for non-NA p-values}
|
||||
\usage{
|
||||
get_lambdagc(ps, tol = 1e-08)
|
||||
}
|
||||
\arguments{
|
||||
\item{ps}{Numeric vector of p-values. Can have NA's.}
|
||||
|
||||
\item{tol}{Numeric. Tolerance for optional Genomic Control coefficient.}
|
||||
}
|
||||
\value{
|
||||
A lambda GC value (some positive number, ideally ~1)
|
||||
}
|
||||
\description{
|
||||
Finds the lambda GC value for some vector of p-values.
|
||||
}
|
||||
31
man/get_qqplot.Rd
Normal file
31
man/get_qqplot.Rd
Normal file
@@ -0,0 +1,31 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/wrapper.R
|
||||
\name{get_qqplot}
|
||||
\alias{get_qqplot}
|
||||
\title{Create a quantile-quantile plot with ggplot2.}
|
||||
\usage{
|
||||
get_qqplot(ps, ci = 0.95, lambdaGC = FALSE, tol = 1e-08)
|
||||
}
|
||||
\arguments{
|
||||
\item{ps}{Numeric vector of p-values.}
|
||||
|
||||
\item{ci}{Numeric. Size of the confidence interval, 0.95 by default.}
|
||||
|
||||
\item{lambdaGC}{Logical. Add the Genomic Control coefficient as subtitle to
|
||||
the plot?}
|
||||
|
||||
\item{tol}{Numeric. Tolerance for optional Genomic Control coefficient.}
|
||||
}
|
||||
\value{
|
||||
A ggplot2 plot.
|
||||
}
|
||||
\description{
|
||||
Assumptions for this quantile quantile plot:
|
||||
Expected P values are uniformly distributed.
|
||||
Confidence intervals assume independence between tests.
|
||||
We expect deviations past the confidence intervals if the tests are
|
||||
not independent.
|
||||
For example, in a genome-wide association study, the genotype at any
|
||||
position is correlated to nearby positions. Tests of nearby genotypes
|
||||
will result in similar test statistics.
|
||||
}
|
||||
12
man/pipe.Rd
Normal file
12
man/pipe.Rd
Normal file
@@ -0,0 +1,12 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/utils-pipe.R
|
||||
\name{\%>\%}
|
||||
\alias{\%>\%}
|
||||
\title{Pipe operator}
|
||||
\usage{
|
||||
lhs \%>\% rhs
|
||||
}
|
||||
\description{
|
||||
See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
|
||||
}
|
||||
\keyword{internal}
|
||||
20
man/round2.Rd
Normal file
20
man/round2.Rd
Normal file
@@ -0,0 +1,20 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/wrapper.R
|
||||
\name{round2}
|
||||
\alias{round2}
|
||||
\title{Return a number rounded to some number of digits}
|
||||
\usage{
|
||||
round2(x, at)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{A number or vector of numbers}
|
||||
|
||||
\item{at}{Numeric. Rounding factor or size of the bin to round to.}
|
||||
}
|
||||
\value{
|
||||
A number or vector of numbers
|
||||
}
|
||||
\description{
|
||||
Given some x, return the number rounded to some number of
|
||||
digits.
|
||||
}
|
||||
31
man/round_xy.Rd
Normal file
31
man/round_xy.Rd
Normal file
@@ -0,0 +1,31 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/wrapper.R
|
||||
\name{round_xy}
|
||||
\alias{round_xy}
|
||||
\title{Return a dataframe binned into 2-d bins by some x and y.}
|
||||
\usage{
|
||||
round_xy(x, y, cl = NA, cu = NA, roundby = 0.001)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Numeric vector. The first vector for binning.}
|
||||
|
||||
\item{y}{Numeric vector. the second vector for binning}
|
||||
|
||||
\item{cl}{Numeric vector. Optional confidence interval for the y vector,
|
||||
lower bound.}
|
||||
|
||||
\item{cu}{Numeric vector. Optional confidence interval for the y vector,
|
||||
upper bound.}
|
||||
|
||||
\item{roundby}{Numeric. The amount to round the x and y vectors by for 2d
|
||||
binning.}
|
||||
}
|
||||
\value{
|
||||
A dataframe containing the 2-d binned values for x and y, and their
|
||||
confidence intervals.
|
||||
}
|
||||
\description{
|
||||
Given a dataframe of x and y values (with some optional
|
||||
confidence intervals surrounding the y values), return only the unique
|
||||
values of x and y in some set of 2-d bins.
|
||||
}
|
||||
51
man/tidyeval.Rd
Normal file
51
man/tidyeval.Rd
Normal file
@@ -0,0 +1,51 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/utils-tidy-eval.R
|
||||
\name{tidyeval}
|
||||
\alias{tidyeval}
|
||||
\alias{expr}
|
||||
\alias{enquo}
|
||||
\alias{enquos}
|
||||
\alias{sym}
|
||||
\alias{syms}
|
||||
\alias{.data}
|
||||
\alias{:=}
|
||||
\alias{as_name}
|
||||
\alias{as_label}
|
||||
\title{Tidy eval helpers}
|
||||
\description{
|
||||
\itemize{
|
||||
\item \code{\link[rlang]{sym}()} creates a symbol from a string and
|
||||
\code{\link[rlang:sym]{syms}()} creates a list of symbols from a
|
||||
character vector.
|
||||
\item \code{\link[rlang:nse-defuse]{enquo}()} and
|
||||
\code{\link[rlang:nse-defuse]{enquos}()} delay the execution of one or
|
||||
several function arguments. \code{enquo()} returns a single quoted
|
||||
expression, which is like a blueprint for the delayed computation.
|
||||
\code{enquos()} returns a list of such quoted expressions.
|
||||
\item \code{\link[rlang:nse-defuse]{expr}()} quotes a new expression \emph{locally}. It
|
||||
is mostly useful to build new expressions around arguments
|
||||
captured with \code{\link[=enquo]{enquo()}} or \code{\link[=enquos]{enquos()}}:
|
||||
\code{expr(mean(!!enquo(arg), na.rm = TRUE))}.
|
||||
\item \code{\link[rlang]{as_name}()} transforms a quoted variable name
|
||||
into a string. Supplying something else than a quoted variable
|
||||
name is an error.
|
||||
|
||||
That's unlike \code{\link[rlang]{as_label}()} which also returns
|
||||
a single string but supports any kind of R object as input,
|
||||
including quoted function calls and vectors. Its purpose is to
|
||||
summarise that object into a single label. That label is often
|
||||
suitable as a default name.
|
||||
|
||||
If you don't know what a quoted expression contains (for instance
|
||||
expressions captured with \code{enquo()} could be a variable
|
||||
name, a call to a function, or an unquoted constant), then use
|
||||
\code{as_label()}. If you know you have quoted a simple variable
|
||||
name, or would like to enforce this, use \code{as_name()}.
|
||||
}
|
||||
|
||||
To learn more about tidy eval and how to use these tools, visit
|
||||
\url{https://tidyeval.tidyverse.org} and the
|
||||
\href{https://adv-r.hadley.nz/metaprogramming.html}{Metaprogramming
|
||||
section} of \href{https://adv-r.hadley.nz}{Advanced R}.
|
||||
}
|
||||
\keyword{internal}
|
||||
Reference in New Issue
Block a user