From 027323acf65ec85563230d001b7548a6a6134712 Mon Sep 17 00:00:00 2001 From: Alice MacQueen Date: Tue, 30 Mar 2021 18:33:06 -0500 Subject: [PATCH] dive_phe2mash function v1 to test on several plant species --- .Rbuildignore | 6 + .gitignore | 5 + DESCRIPTION | 33 ++ LICENSE | 339 ------------------ LICENSE.md | 595 ++++++++++++++++++++++++++++++ NAMESPACE | 69 ++++ R/snpdiver-package.R | 4 + R/utils-pipe.R | 11 + R/utils-tidy-eval.R | 47 +++ R/wrapper.R | 814 ++++++++++++++++++++++++++++++++++++++++++ _pkgdown.yml | 1 + man/div_gwas.Rd | 34 ++ man/div_lambda_GC.Rd | 48 +++ man/dive_phe2mash.Rd | 89 +++++ man/get_best_PC_df.Rd | 22 ++ man/get_lambdagc.Rd | 19 + man/get_qqplot.Rd | 31 ++ man/pipe.Rd | 12 + man/round2.Rd | 20 ++ man/round_xy.Rd | 31 ++ man/tidyeval.Rd | 51 +++ snpdiver.Rproj | 22 ++ 22 files changed, 1964 insertions(+), 339 deletions(-) create mode 100644 .Rbuildignore create mode 100644 DESCRIPTION delete mode 100644 LICENSE create mode 100644 LICENSE.md create mode 100644 NAMESPACE create mode 100644 R/snpdiver-package.R create mode 100644 R/utils-pipe.R create mode 100644 R/utils-tidy-eval.R create mode 100644 R/wrapper.R create mode 100644 _pkgdown.yml create mode 100644 man/div_gwas.Rd create mode 100644 man/div_lambda_GC.Rd create mode 100644 man/dive_phe2mash.Rd create mode 100644 man/get_best_PC_df.Rd create mode 100644 man/get_lambdagc.Rd create mode 100644 man/get_qqplot.Rd create mode 100644 man/pipe.Rd create mode 100644 man/round2.Rd create mode 100644 man/round_xy.Rd create mode 100644 man/tidyeval.Rd create mode 100644 snpdiver.Rproj diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..98f8d44 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,6 @@ +^snpdiver\.Rproj$ +^\.Rproj\.user$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ +^LICENSE\.md$ diff --git a/.gitignore b/.gitignore index fae8299..79b84c2 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,8 @@ vignettes/*.pdf # R Environment Variables .Renviron +.Rproj.user +docs + +# Test files manually produced when checking functions +tests/ diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..8ce25de --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,33 @@ +Package: snpdiver +Title: Explore SNP Effects on Multiple Phenotypes +Version: 0.0.0.9000 +Authors@R: + person(given = "Alice", + family = "MacQueen", + role = c("aut", "cre"), + email = "alice.macqueen@gmail.com", + comment = c(ORCID = "0000-0002-4606-1832")) +Description: What the package does (one paragraph). +License: GPL (>= 3) +Encoding: UTF-8 +LazyData: true +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.1.1 +Imports: + ashr, + bigsnpr, + bigstatsr, + cowplot, + dplyr, + ggplot2, + mashr, + magrittr, + matrixStats, + purrr, + readr, + rlang (>= 0.1.2), + tibble, + tidyr, + tidyselect +Suggests: + roxygen2 diff --git a/LICENSE b/LICENSE deleted file mode 100644 index d159169..0000000 --- a/LICENSE +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Lesser General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..175443c --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,595 @@ +GNU General Public License +========================== + +_Version 3, 29 June 2007_ +_Copyright © 2007 Free Software Foundation, Inc. <>_ + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +## Preamble + +The GNU General Public License is a free, copyleft license for software and other +kinds of works. + +The licenses for most software and other practical works are designed to take away +your freedom to share and change the works. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change all versions of a +program--to make sure it remains free software for all its users. We, the Free +Software Foundation, use the GNU General Public License for most of our software; it +applies also to any other work released this way by its authors. You can apply it to +your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our General +Public Licenses are designed to make sure that you have the freedom to distribute +copies of free software (and charge for them if you wish), that you receive source +code or can get it if you want it, that you can change the software or use pieces of +it in new free programs, and that you know you can do these things. + +To protect your rights, we need to prevent others from denying you these rights or +asking you to surrender the rights. Therefore, you have certain responsibilities if +you distribute copies of the software, or if you modify it: responsibilities to +respect the freedom of others. + +For example, if you distribute copies of such a program, whether gratis or for a fee, +you must pass on to the recipients the same freedoms that you received. You must make +sure that they, too, receive or can get the source code. And you must show them these +terms so they know their rights. + +Developers that use the GNU GPL protect your rights with two steps: **(1)** assert +copyright on the software, and **(2)** offer you this License giving you legal permission +to copy, distribute and/or modify it. + +For the developers' and authors' protection, the GPL clearly explains that there is +no warranty for this free software. For both users' and authors' sake, the GPL +requires that modified versions be marked as changed, so that their problems will not +be attributed erroneously to authors of previous versions. + +Some devices are designed to deny users access to install or run modified versions of +the software inside them, although the manufacturer can do so. This is fundamentally +incompatible with the aim of protecting users' freedom to change the software. The +systematic pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we have designed +this version of the GPL to prohibit the practice for those products. If such problems +arise substantially in other domains, we stand ready to extend this provision to +those domains in future versions of the GPL, as needed to protect the freedom of +users. + +Finally, every program is threatened constantly by software patents. States should +not allow patents to restrict development and use of software on general-purpose +computers, but in those that do, we wish to avoid the special danger that patents +applied to a free program could make it effectively proprietary. To prevent this, the +GPL assures that patents cannot be used to render the program non-free. + +The precise terms and conditions for copying, distribution and modification follow. + +## TERMS AND CONDITIONS + +### 0. Definitions + +“This License” refers to version 3 of the GNU General Public License. + +“Copyright” also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + +“The Program” refers to any copyrightable work licensed under this +License. Each licensee is addressed as “you”. “Licensees” and +“recipients” may be individuals or organizations. + +To “modify” a work means to copy from or adapt all or part of the work in +a fashion requiring copyright permission, other than the making of an exact copy. The +resulting work is called a “modified version” of the earlier work or a +work “based on” the earlier work. + +A “covered work” means either the unmodified Program or a work based on +the Program. + +To “propagate” a work means to do anything with it that, without +permission, would make you directly or secondarily liable for infringement under +applicable copyright law, except executing it on a computer or modifying a private +copy. Propagation includes copying, distribution (with or without modification), +making available to the public, and in some countries other activities as well. + +To “convey” a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through a computer +network, with no transfer of a copy, is not conveying. + +An interactive user interface displays “Appropriate Legal Notices” to the +extent that it includes a convenient and prominently visible feature that **(1)** +displays an appropriate copyright notice, and **(2)** tells the user that there is no +warranty for the work (except to the extent that warranties are provided), that +licensees may convey the work under this License, and how to view a copy of this +License. If the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + +### 1. Source Code + +The “source code” for a work means the preferred form of the work for +making modifications to it. “Object code” means any non-source form of a +work. + +A “Standard Interface” means an interface that either is an official +standard defined by a recognized standards body, or, in the case of interfaces +specified for a particular programming language, one that is widely used among +developers working in that language. + +The “System Libraries” of an executable work include anything, other than +the work as a whole, that **(a)** is included in the normal form of packaging a Major +Component, but which is not part of that Major Component, and **(b)** serves only to +enable use of the work with that Major Component, or to implement a Standard +Interface for which an implementation is available to the public in source code form. +A “Major Component”, in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system (if any) on which +the executable work runs, or a compiler used to produce the work, or an object code +interpreter used to run it. + +The “Corresponding Source” for a work in object code form means all the +source code needed to generate, install, and (for an executable work) run the object +code and to modify the work, including scripts to control those activities. However, +it does not include the work's System Libraries, or general-purpose tools or +generally available free programs which are used unmodified in performing those +activities but which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for the work, and +the source code for shared libraries and dynamically linked subprograms that the work +is specifically designed to require, such as by intimate data communication or +control flow between those subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can regenerate +automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same work. + +### 2. Basic Permissions + +All rights granted under this License are granted for the term of copyright on the +Program, and are irrevocable provided the stated conditions are met. This License +explicitly affirms your unlimited permission to run the unmodified Program. The +output from running a covered work is covered by this License only if the output, +given its content, constitutes a covered work. This License acknowledges your rights +of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, without +conditions so long as your license otherwise remains in force. You may convey covered +works to others for the sole purpose of having them make modifications exclusively +for you, or provide you with facilities for running those works, provided that you +comply with the terms of this License in conveying all material for which you do not +control copyright. Those thus making or running the covered works for you must do so +exclusively on your behalf, under your direction and control, on terms that prohibit +them from making any copies of your copyrighted material outside their relationship +with you. + +Conveying under any other circumstances is permitted solely under the conditions +stated below. Sublicensing is not allowed; section 10 makes it unnecessary. + +### 3. Protecting Users' Legal Rights From Anti-Circumvention Law + +No covered work shall be deemed part of an effective technological measure under any +applicable law fulfilling obligations under article 11 of the WIPO copyright treaty +adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention +of such measures. + +When you convey a covered work, you waive any legal power to forbid circumvention of +technological measures to the extent such circumvention is effected by exercising +rights under this License with respect to the covered work, and you disclaim any +intention to limit operation or modification of the work as a means of enforcing, +against the work's users, your or third parties' legal rights to forbid circumvention +of technological measures. + +### 4. Conveying Verbatim Copies + +You may convey verbatim copies of the Program's source code as you receive it, in any +medium, provided that you conspicuously and appropriately publish on each copy an +appropriate copyright notice; keep intact all notices stating that this License and +any non-permissive terms added in accord with section 7 apply to the code; keep +intact all notices of the absence of any warranty; and give all recipients a copy of +this License along with the Program. + +You may charge any price or no price for each copy that you convey, and you may offer +support or warranty protection for a fee. + +### 5. Conveying Modified Source Versions + +You may convey a work based on the Program, or the modifications to produce it from +the Program, in the form of source code under the terms of section 4, provided that +you also meet all of these conditions: + +* **a)** The work must carry prominent notices stating that you modified it, and giving a +relevant date. +* **b)** The work must carry prominent notices stating that it is released under this +License and any conditions added under section 7. This requirement modifies the +requirement in section 4 to “keep intact all notices”. +* **c)** You must license the entire work, as a whole, under this License to anyone who +comes into possession of a copy. This License will therefore apply, along with any +applicable section 7 additional terms, to the whole of the work, and all its parts, +regardless of how they are packaged. This License gives no permission to license the +work in any other way, but it does not invalidate such permission if you have +separately received it. +* **d)** If the work has interactive user interfaces, each must display Appropriate Legal +Notices; however, if the Program has interactive interfaces that do not display +Appropriate Legal Notices, your work need not make them do so. + +A compilation of a covered work with other separate and independent works, which are +not by their nature extensions of the covered work, and which are not combined with +it such as to form a larger program, in or on a volume of a storage or distribution +medium, is called an “aggregate” if the compilation and its resulting +copyright are not used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work in an aggregate +does not cause this License to apply to the other parts of the aggregate. + +### 6. Conveying Non-Source Forms + +You may convey a covered work in object code form under the terms of sections 4 and +5, provided that you also convey the machine-readable Corresponding Source under the +terms of this License, in one of these ways: + +* **a)** Convey the object code in, or embodied in, a physical product (including a +physical distribution medium), accompanied by the Corresponding Source fixed on a +durable physical medium customarily used for software interchange. +* **b)** Convey the object code in, or embodied in, a physical product (including a +physical distribution medium), accompanied by a written offer, valid for at least +three years and valid for as long as you offer spare parts or customer support for +that product model, to give anyone who possesses the object code either **(1)** a copy of +the Corresponding Source for all the software in the product that is covered by this +License, on a durable physical medium customarily used for software interchange, for +a price no more than your reasonable cost of physically performing this conveying of +source, or **(2)** access to copy the Corresponding Source from a network server at no +charge. +* **c)** Convey individual copies of the object code with a copy of the written offer to +provide the Corresponding Source. This alternative is allowed only occasionally and +noncommercially, and only if you received the object code with such an offer, in +accord with subsection 6b. +* **d)** Convey the object code by offering access from a designated place (gratis or for +a charge), and offer equivalent access to the Corresponding Source in the same way +through the same place at no further charge. You need not require recipients to copy +the Corresponding Source along with the object code. If the place to copy the object +code is a network server, the Corresponding Source may be on a different server +(operated by you or a third party) that supports equivalent copying facilities, +provided you maintain clear directions next to the object code saying where to find +the Corresponding Source. Regardless of what server hosts the Corresponding Source, +you remain obligated to ensure that it is available for as long as needed to satisfy +these requirements. +* **e)** Convey the object code using peer-to-peer transmission, provided you inform +other peers where the object code and Corresponding Source of the work are being +offered to the general public at no charge under subsection 6d. + +A separable portion of the object code, whose source code is excluded from the +Corresponding Source as a System Library, need not be included in conveying the +object code work. + +A “User Product” is either **(1)** a “consumer product”, which +means any tangible personal property which is normally used for personal, family, or +household purposes, or **(2)** anything designed or sold for incorporation into a +dwelling. In determining whether a product is a consumer product, doubtful cases +shall be resolved in favor of coverage. For a particular product received by a +particular user, “normally used” refers to a typical or common use of +that class of product, regardless of the status of the particular user or of the way +in which the particular user actually uses, or expects or is expected to use, the +product. A product is a consumer product regardless of whether the product has +substantial commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + +“Installation Information” for a User Product means any methods, +procedures, authorization keys, or other information required to install and execute +modified versions of a covered work in that User Product from a modified version of +its Corresponding Source. The information must suffice to ensure that the continued +functioning of the modified object code is in no case prevented or interfered with +solely because modification has been made. + +If you convey an object code work under this section in, or with, or specifically for +use in, a User Product, and the conveying occurs as part of a transaction in which +the right of possession and use of the User Product is transferred to the recipient +in perpetuity or for a fixed term (regardless of how the transaction is +characterized), the Corresponding Source conveyed under this section must be +accompanied by the Installation Information. But this requirement does not apply if +neither you nor any third party retains the ability to install modified object code +on the User Product (for example, the work has been installed in ROM). + +The requirement to provide Installation Information does not include a requirement to +continue to provide support service, warranty, or updates for a work that has been +modified or installed by the recipient, or for the User Product in which it has been +modified or installed. Access to a network may be denied when the modification itself +materially and adversely affects the operation of the network or violates the rules +and protocols for communication across the network. + +Corresponding Source conveyed, and Installation Information provided, in accord with +this section must be in a format that is publicly documented (and with an +implementation available to the public in source code form), and must require no +special password or key for unpacking, reading or copying. + +### 7. Additional Terms + +“Additional permissions” are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. Additional +permissions that are applicable to the entire Program shall be treated as though they +were included in this License, to the extent that they are valid under applicable +law. If additional permissions apply only to part of the Program, that part may be +used separately under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option remove any +additional permissions from that copy, or from any part of it. (Additional +permissions may be written to require their own removal in certain cases when you +modify the work.) You may place additional permissions on material, added by you to a +covered work, for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you add to a +covered work, you may (if authorized by the copyright holders of that material) +supplement the terms of this License with terms: + +* **a)** Disclaiming warranty or limiting liability differently from the terms of +sections 15 and 16 of this License; or +* **b)** Requiring preservation of specified reasonable legal notices or author +attributions in that material or in the Appropriate Legal Notices displayed by works +containing it; or +* **c)** Prohibiting misrepresentation of the origin of that material, or requiring that +modified versions of such material be marked in reasonable ways as different from the +original version; or +* **d)** Limiting the use for publicity purposes of names of licensors or authors of the +material; or +* **e)** Declining to grant rights under trademark law for use of some trade names, +trademarks, or service marks; or +* **f)** Requiring indemnification of licensors and authors of that material by anyone +who conveys the material (or modified versions of it) with contractual assumptions of +liability to the recipient, for any liability that these contractual assumptions +directly impose on those licensors and authors. + +All other non-permissive additional terms are considered “further +restrictions” within the meaning of section 10. If the Program as you received +it, or any part of it, contains a notice stating that it is governed by this License +along with a term that is a further restriction, you may remove that term. If a +license document contains a further restriction but permits relicensing or conveying +under this License, you may add to a covered work material governed by the terms of +that license document, provided that the further restriction does not survive such +relicensing or conveying. + +If you add terms to a covered work in accord with this section, you must place, in +the relevant source files, a statement of the additional terms that apply to those +files, or a notice indicating where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the form of a +separately written license, or stated as exceptions; the above requirements apply +either way. + +### 8. Termination + +You may not propagate or modify a covered work except as expressly provided under +this License. Any attempt otherwise to propagate or modify it is void, and will +automatically terminate your rights under this License (including any patent licenses +granted under the third paragraph of section 11). + +However, if you cease all violation of this License, then your license from a +particular copyright holder is reinstated **(a)** provisionally, unless and until the +copyright holder explicitly and finally terminates your license, and **(b)** permanently, +if the copyright holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is reinstated permanently +if the copyright holder notifies you of the violation by some reasonable means, this +is the first time you have received notice of violation of this License (for any +work) from that copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the licenses of +parties who have received copies or rights from you under this License. If your +rights have been terminated and not permanently reinstated, you do not qualify to +receive new licenses for the same material under section 10. + +### 9. Acceptance Not Required for Having Copies + +You are not required to accept this License in order to receive or run a copy of the +Program. Ancillary propagation of a covered work occurring solely as a consequence of +using peer-to-peer transmission to receive a copy likewise does not require +acceptance. However, nothing other than this License grants you permission to +propagate or modify any covered work. These actions infringe copyright if you do not +accept this License. Therefore, by modifying or propagating a covered work, you +indicate your acceptance of this License to do so. + +### 10. Automatic Licensing of Downstream Recipients + +Each time you convey a covered work, the recipient automatically receives a license +from the original licensors, to run, modify and propagate that work, subject to this +License. You are not responsible for enforcing compliance by third parties with this +License. + +An “entity transaction” is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an organization, or +merging organizations. If propagation of a covered work results from an entity +transaction, each party to that transaction who receives a copy of the work also +receives whatever licenses to the work the party's predecessor in interest had or +could give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if the predecessor +has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the rights granted or +affirmed under this License. For example, you may not impose a license fee, royalty, +or other charge for exercise of rights granted under this License, and you may not +initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging +that any patent claim is infringed by making, using, selling, offering for sale, or +importing the Program or any portion of it. + +### 11. Patents + +A “contributor” is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The work thus +licensed is called the contributor's “contributor version”. + +A contributor's “essential patent claims” are all patent claims owned or +controlled by the contributor, whether already acquired or hereafter acquired, that +would be infringed by some manner, permitted by this License, of making, using, or +selling its contributor version, but do not include claims that would be infringed +only as a consequence of further modification of the contributor version. For +purposes of this definition, “control” includes the right to grant patent +sublicenses in a manner consistent with the requirements of this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free patent license +under the contributor's essential patent claims, to make, use, sell, offer for sale, +import and otherwise run, modify and propagate the contents of its contributor +version. + +In the following three paragraphs, a “patent license” is any express +agreement or commitment, however denominated, not to enforce a patent (such as an +express permission to practice a patent or covenant not to sue for patent +infringement). To “grant” such a patent license to a party means to make +such an agreement or commitment not to enforce a patent against the party. + +If you convey a covered work, knowingly relying on a patent license, and the +Corresponding Source of the work is not available for anyone to copy, free of charge +and under the terms of this License, through a publicly available network server or +other readily accessible means, then you must either **(1)** cause the Corresponding +Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the +patent license for this particular work, or **(3)** arrange, in a manner consistent with +the requirements of this License, to extend the patent license to downstream +recipients. “Knowingly relying” means you have actual knowledge that, but +for the patent license, your conveying the covered work in a country, or your +recipient's use of the covered work in a country, would infringe one or more +identifiable patents in that country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or arrangement, you +convey, or propagate by procuring conveyance of, a covered work, and grant a patent +license to some of the parties receiving the covered work authorizing them to use, +propagate, modify or convey a specific copy of the covered work, then the patent +license you grant is automatically extended to all recipients of the covered work and +works based on it. + +A patent license is “discriminatory” if it does not include within the +scope of its coverage, prohibits the exercise of, or is conditioned on the +non-exercise of one or more of the rights that are specifically granted under this +License. You may not convey a covered work if you are a party to an arrangement with +a third party that is in the business of distributing software, under which you make +payment to the third party based on the extent of your activity of conveying the +work, and under which the third party grants, to any of the parties who would receive +the covered work from you, a discriminatory patent license **(a)** in connection with +copies of the covered work conveyed by you (or copies made from those copies), or **(b)** +primarily for and in connection with specific products or compilations that contain +the covered work, unless you entered into that arrangement, or that patent license +was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting any implied +license or other defenses to infringement that may otherwise be available to you +under applicable patent law. + +### 12. No Surrender of Others' Freedom + +If conditions are imposed on you (whether by court order, agreement or otherwise) +that contradict the conditions of this License, they do not excuse you from the +conditions of this License. If you cannot convey a covered work so as to satisfy +simultaneously your obligations under this License and any other pertinent +obligations, then as a consequence you may not convey it at all. For example, if you +agree to terms that obligate you to collect a royalty for further conveying from +those to whom you convey the Program, the only way you could satisfy both those terms +and this License would be to refrain entirely from conveying the Program. + +### 13. Use with the GNU Affero General Public License + +Notwithstanding any other provision of this License, you have permission to link or +combine any covered work with a work licensed under version 3 of the GNU Affero +General Public License into a single combined work, and to convey the resulting work. +The terms of this License will continue to apply to the part which is the covered +work, but the special requirements of the GNU Affero General Public License, section +13, concerning interaction through a network will apply to the combination as such. + +### 14. Revised Versions of this License + +The Free Software Foundation may publish revised and/or new versions of the GNU +General Public License from time to time. Such new versions will be similar in spirit +to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies that +a certain numbered version of the GNU General Public License “or any later +version” applies to it, you have the option of following the terms and +conditions either of that numbered version or of any later version published by the +Free Software Foundation. If the Program does not specify a version number of the GNU +General Public License, you may choose any version ever published by the Free +Software Foundation. + +If the Program specifies that a proxy can decide which future versions of the GNU +General Public License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the Program. + +Later license versions may give you additional or different permissions. However, no +additional obligations are imposed on any author or copyright holder as a result of +your choosing to follow a later version. + +### 15. Disclaimer of Warranty + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER +EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE +QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +### 16. Limitation of Liability + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY +COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS +PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, +INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE +OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE +WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + +### 17. Interpretation of Sections 15 and 16 + +If the disclaimer of warranty and limitation of liability provided above cannot be +given local legal effect according to their terms, reviewing courts shall apply local +law that most closely approximates an absolute waiver of all civil liability in +connection with the Program, unless a warranty or assumption of liability accompanies +a copy of the Program in return for a fee. + +_END OF TERMS AND CONDITIONS_ + +## How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible use to +the public, the best way to achieve this is to make it free software which everyone +can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach them +to the start of each source file to most effectively state the exclusion of warranty; +and each file should have at least the “copyright” line and a pointer to +where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + +If the program does terminal interaction, make it output a short notice like this +when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type 'show c' for details. + +The hypothetical commands `show w` and `show c` should show the appropriate parts of +the General Public License. Of course, your program's commands might be different; +for a GUI interface, you would use an “about box”. + +You should also get your employer (if you work as a programmer) or school, if any, to +sign a “copyright disclaimer” for the program, if necessary. For more +information on this, and how to apply and follow the GNU GPL, see +<>. + +The GNU General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may consider it +more useful to permit linking proprietary applications with the library. If this is +what you want to do, use the GNU Lesser General Public License instead of this +License. But first, please read +<>. diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..0ec7dd1 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,69 @@ +# Generated by roxygen2: do not edit by hand + +export("%>%") +export(":=") +export(.data) +export(as_label) +export(as_name) +export(div_gwas) +export(div_lambda_GC) +export(dive_phe2mash) +export(enquo) +export(enquos) +export(expr) +export(get_lambdagc) +export(get_qqplot) +export(sym) +export(syms) +import(bigsnpr) +import(bigstatsr) +import(ggplot2) +import(mashr) +importFrom(ashr,get_fitted_g) +importFrom(bigsnpr,snp_autoSVD) +importFrom(cowplot,save_plot) +importFrom(dplyr,arrange) +importFrom(dplyr,case_when) +importFrom(dplyr,filter) +importFrom(dplyr,full_join) +importFrom(dplyr,group_by) +importFrom(dplyr,left_join) +importFrom(dplyr,mutate) +importFrom(dplyr,mutate_if) +importFrom(dplyr,rename) +importFrom(dplyr,select) +importFrom(dplyr,slice) +importFrom(dplyr,slice_max) +importFrom(dplyr,slice_sample) +importFrom(dplyr,summarise) +importFrom(dplyr,top_n) +importFrom(magrittr,"%>%") +importFrom(matrixStats,colMaxs) +importFrom(matrixStats,rowMaxs) +importFrom(purrr,as_vector) +importFrom(readr,write_csv) +importFrom(rlang,"!!") +importFrom(rlang,":=") +importFrom(rlang,.data) +importFrom(rlang,as_label) +importFrom(rlang,as_name) +importFrom(rlang,enquo) +importFrom(rlang,enquos) +importFrom(rlang,expr) +importFrom(rlang,sym) +importFrom(rlang,syms) +importFrom(stats,median) +importFrom(stats,ppoints) +importFrom(stats,predict) +importFrom(stats,qbeta) +importFrom(stats,uniroot) +importFrom(tibble,add_column) +importFrom(tibble,add_row) +importFrom(tibble,as_tibble) +importFrom(tibble,enframe) +importFrom(tibble,rownames_to_column) +importFrom(tibble,tibble) +importFrom(tidyr,gather) +importFrom(tidyr,replace_na) +importFrom(tidyselect,all_of) +importFrom(utils,tail) diff --git a/R/snpdiver-package.R b/R/snpdiver-package.R new file mode 100644 index 0000000..6161ea4 --- /dev/null +++ b/R/snpdiver-package.R @@ -0,0 +1,4 @@ +## usethis namespace: start +#' @importFrom tibble tibble +## usethis namespace: end +NULL diff --git a/R/utils-pipe.R b/R/utils-pipe.R new file mode 100644 index 0000000..e79f3d8 --- /dev/null +++ b/R/utils-pipe.R @@ -0,0 +1,11 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +NULL diff --git a/R/utils-tidy-eval.R b/R/utils-tidy-eval.R new file mode 100644 index 0000000..b4e6c7f --- /dev/null +++ b/R/utils-tidy-eval.R @@ -0,0 +1,47 @@ +#' Tidy eval helpers +#' +#' @description +#' +#' * \code{\link[rlang]{sym}()} creates a symbol from a string and +#' \code{\link[rlang:sym]{syms}()} creates a list of symbols from a +#' character vector. +#' +#' * \code{\link[rlang:nse-defuse]{enquo}()} and +#' \code{\link[rlang:nse-defuse]{enquos}()} delay the execution of one or +#' several function arguments. \code{enquo()} returns a single quoted +#' expression, which is like a blueprint for the delayed computation. +#' \code{enquos()} returns a list of such quoted expressions. +#' +#' * \code{\link[rlang:nse-defuse]{expr}()} quotes a new expression _locally_. It +#' is mostly useful to build new expressions around arguments +#' captured with [enquo()] or [enquos()]: +#' \code{expr(mean(!!enquo(arg), na.rm = TRUE))}. +#' +#' * \code{\link[rlang]{as_name}()} transforms a quoted variable name +#' into a string. Supplying something else than a quoted variable +#' name is an error. +#' +#' That's unlike \code{\link[rlang]{as_label}()} which also returns +#' a single string but supports any kind of R object as input, +#' including quoted function calls and vectors. Its purpose is to +#' summarise that object into a single label. That label is often +#' suitable as a default name. +#' +#' If you don't know what a quoted expression contains (for instance +#' expressions captured with \code{enquo()} could be a variable +#' name, a call to a function, or an unquoted constant), then use +#' \code{as_label()}. If you know you have quoted a simple variable +#' name, or would like to enforce this, use \code{as_name()}. +#' +#' To learn more about tidy eval and how to use these tools, visit +#' \url{https://tidyeval.tidyverse.org} and the +#' \href{https://adv-r.hadley.nz/metaprogramming.html}{Metaprogramming +#' section} of \href{https://adv-r.hadley.nz}{Advanced R}. +#' +#' @md +#' @name tidyeval +#' @keywords internal +#' @importFrom rlang expr enquo enquos sym syms .data := as_name as_label +#' @aliases expr enquo enquos sym syms .data := as_name as_label +#' @export expr enquo enquos sym syms .data := as_name as_label +NULL diff --git a/R/wrapper.R b/R/wrapper.R new file mode 100644 index 0000000..03a2fb9 --- /dev/null +++ b/R/wrapper.R @@ -0,0 +1,814 @@ +#' @title Wrapper to run mash given a phenotype data frame +#' +#' @description Though step-by-step GWAS, preparation of mash inputs, and mash +#' allows you the most flexibility and opportunities to check your results +#' for errors, once those sanity checks are complete, this function allows +#' you to go from a phenotype data.frame of a few phenotypes you want to +#' compare to a mash result. Some exception handling has been built into +#' this function, but the user should stay cautious and skeptical of any +#' results that seem 'too good to be true'. +#' +#' @param df Dataframe containing phenotypes for mash where the first column is +#' 'sample.ID', which should match values in the snp$fam$sample.ID column. +#' @param snp A "bigSNP" object; load with \code{snp_attach()}. +#' @param type Character string, or a character vector the length of the number +#' of phenotypes. Type of univarate regression to run for GWAS. +#' Options are "linear" or "logistic". +#' @param svd A "big_SVD" object; Optional covariance matrix to use for +#' population structure correction. +#' @param suffix Optional character vector to give saved files a unique search string/name. +#' @param outputdir Optional file path to save output files. +#' @param min.phe Integer. Minimum number of individuals phenotyped in order to +#' include that phenotype in GWAS. Default is 200. Use lower values with +#' caution. +#' @param save.plots Logical. Should Manhattan and QQ-plots be generated and +#' saved to the working directory for univariate GWAS? Default is TRUE. +#' @param thr.r2 Value between 0 and 1. Threshold of r2 measure of linkage +#' disequilibrium. Markers in higher LD than this will be subset using clumping. +#' @param thr.m "sum" or "max". Type of threshold to use to clump values for +#' mash inputs. "sum" sums the -log10pvalues for each phenotype and uses +#' the maximum of this value as the threshold. "max" uses the maximum +#' -log10pvalue for each SNP across all of the univariate GWAS. +#' @param num.strong Integer. Number of SNPs used to derive data-driven covariance +#' matrix patterns, using markers with strong effects on phenotypes. +#' @param num.random Integer. Number of SNPs used to derive the correlation structure +#' of the null tests, and the mash fit on the null tests. +#' @param scale.phe Logical. Should effects for each phenotype be scaled to fall +#' between -1 and 1? Default is TRUE. +#' @param roll.size Integer. Used to create the svd for GWAS. +#' @param U.ed Mash data-driven covariance matrices. Specify these as a list or a path +#' to a file saved as an .rds. Creating these can be time-consuming, and +#' generating these once and reusing them for multiple mash runs can save time. +#' @param U.hyp Other covariance matrices for mash. Specify these as a list. These +#' matrices must have dimensions that match the number of phenotypes where +#' univariate GWAS ran successfully. +#' +#' @return A mash object made up of all phenotypes where univariate GWAS ran +#' successfully. +#' +#' @importFrom ashr get_fitted_g +#' @importFrom tibble tibble enframe add_row add_column rownames_to_column +#' @importFrom bigsnpr snp_autoSVD +#' @importFrom dplyr group_by summarise left_join select slice slice_max slice_sample mutate filter +#' @import bigstatsr +#' @import mashr +#' @importFrom cowplot save_plot +#' @importFrom tidyr replace_na +#' @importFrom matrixStats colMaxs rowMaxs +#' @importFrom stats predict +#' +#' @export +dive_phe2mash <- function(df, snp, type = "linear", svd = NULL, suffix = "", + outputdir = ".", + min.phe = 200, save.plots = TRUE, thr.r2 = 0.2, + thr.m = c("sum", "max"), num.strong = 1000, + num.random = NA, + scale.phe = TRUE, roll.size = 50, U.ed = NA, + U.hyp = NA){ + # 1. Stop if not functions. ---- + if (attr(snp, "class") != "bigSNP") { + stop("snp needs to be a bigSNP object, produced by the bigsnpr package.") + } + if (colnames(df)[1] != "sample.ID") { + stop("First column of phenotype dataframe (df) must be 'sample.ID'.") + } + if (length(type) > 1) { + if (length(type) != ncol(df) - 1) { + stop(paste0("Specify either one GWAS type (type = 'linear' or type = ", + "'logistic'), or one type for each phenotype in 'df'.")) + } + } else { + type <- rep(type, ncol(df) - 1) + } + + ## 1a. Generate useful values ---- + G <- snp$genotypes + nSNP_M <- round(snp$genotypes$ncol/1000000, digits = 1) + nSNP <- paste0(nSNP_M, "_M") + if (nSNP_M < 1) { + nSNP_K <- round(snp$genotypes$ncol/1000, digits = 1) + nSNP <- paste0(nSNP_K, "_K") + } + # nInd <- snp$genotypes$nrow + plants <- snp$fam$sample.ID + bonferroni <- -log10(0.05/length(snp$map$physical.pos)) + markers <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos, + marker.ID = snp$map$marker.ID) %>% + mutate(CHRN = as.numeric(as.factor(.data$CHR))) + + # 2. Pop Structure Correction ---- + if (is.null(svd)) { + message(paste0("Covariance matrix (svd) was not supplied - this will be", + " generated using snp_autoSVD().")) + svd <- snp_autoSVD(G = G, infos.chr = markers$CHRN, infos.pos = markers$POS, + k = 10, thr.r2 = thr.r2, roll.size = roll.size) + } else { + stopifnot(attr(svd, "class") == "big_SVD") + } + pc_max <- ncol(svd$u) + gwas_ok <- c() + + for (i in 2:ncol(df)) { + df1 <- df %>% + dplyr::select(.data$sample.ID, all_of(i)) + phename <- names(df1)[2] + df1 <- df1 %>% + group_by(.data$sample.ID) %>% + filter(!is.na(.data[[phename]])) %>% + summarise(phe = mean(.data[[phename]]), .groups = "drop_last") + df1 <- plants %>% + enframe(name = NULL, value = "sample.ID") %>% + mutate(sample.ID = as.character(.data$sample.ID)) %>% + left_join(df1, by = "sample.ID") + nPhe <- length(which(!is.na(df1[,2]))) + nLev <- nrow(unique(df1[which(!is.na(df1[,2])),2])) + + # Checks for correct combinations of phenotypes and GWAS types. + gwas_ok[i-1] <- check_gwas(df1 = df1, phename = phename, type = type[i-1], + nPhe = nPhe, minphe = min.phe, nLev = nLev) + + # Find best # PCs to correct for population structure for each phenotype. + if(gwas_ok[i-1]){ + lambdagc_df <- div_lambda_GC(df = df1, type = type[i-1], snp = snp, + svd = svd, npcs = c(0:pc_max)) + PC_df <- get_best_PC_df(lambdagc_df) + PC_df <- PC_df[1,] + + # 3. GWAS ---- + + # run gwas using best npcs from step 2 (best pop structure correction) + gwas <- div_gwas(df = df1, snp = snp, type = type[i - 1], svd = svd, + npcs = PC_df$NumPCs) + gwas <- gwas %>% mutate(pvalue = predict(gwas, log10 = FALSE), + log10p = -log10(.data$pvalue)) + gwas_data <- tibble(phe = phename, type = type[i - 1], + nsnp = nSNP, npcs = PC_df$NumPCs, nphe = nPhe, + nlev = nLev, lambda_GC = PC_df$lambda_GC, + bonferroni = bonferroni) + # plot Manhattan and QQ if save.plots == TRUE + if(save.plots == TRUE){ + qqplot <- get_qqplot(ps = gwas$pvalue, lambdaGC = TRUE) + manhattan <- get_manhattan(log10p = gwas$log10p, snp = snp, + thresh = bonferroni) # could round these too + plotname <- paste0(gwas_data$phe, "_", gwas_data$type, "_model_", + gwas_data$nphe, "g_", gwas_data$nsnp, "_SNPs_", + gwas_data$npcs, "_PCs.png") + save_plot(filename = file.path(outputdir, paste0("QQplot_", plotname)), + plot = qqplot, base_asp = 1, base_height = 4) + save_plot(filename = file.path(outputdir, paste0("Manhattan_", plotname)), + plot = manhattan, base_asp = 2.1, base_height = 3.5) + } + + # save gwas outputs together in a fbm + gwas <- gwas %>% + select(.data[["estim"]], .data[["std.err"]], .data[["log10p"]]) + + if(i == 2){ # save .bk and .rds file the first time through the loop. + if (!grepl("_$", suffix) & suffix != ""){ + suffix <- paste0("_", suffix) + } + fbm.name <- paste0(outputdir, "gwas_effects", suffix) + + colnames_fbm <- c(paste0(phename, "_Effect"), paste0(phename, "_SE"), + paste0(phename, "_log10p")) + as_FBM(gwas, backingfile = fbm.name)$save() + gwas2 <- big_attach(paste0(fbm.name, ".rds")) + gwas_metadata <- gwas_data + + } else { + colnames_fbm <- c(colnames_fbm, paste0(phename, "_Effect"), + paste0(phename, "_SE"), paste0(phename, "_log10p")) + gwas2$add_columns(ncol_add = 3) + gwas2[,c(i*3-5, i*3-4, i*3-3)] <- gwas + gwas2$save() + gwas_metadata <- add_row(gwas_metadata, phe = phename, type = type[i-1], + nsnp = nSNP, npcs = PC_df$NumPCs, nphe = nPhe, + nlev = nLev, lambda_GC = PC_df$lambda_GC, + bonferroni = bonferroni) + } + rm(gwas) + message(paste0("Finished phenotype ", i-1, ": ", names(df)[i])) + } + } + + # 4. mash input ---- + ## prioritize effects with max(log10p) or max(sum(log10p)) + ## make a random set of relatively unlinked SNPs + ind_estim <- (1:sum(gwas_ok))*3 - 2 + ind_se <- (1:sum(gwas_ok))*3 - 1 + ind_p <- (1:sum(gwas_ok))*3 + + if(thr.m == "sum"){ + thr_log10p <- big_apply(gwas2, + a.FUN = function(X, ind) rowSums(X[, ind]), + ind = ind_p, + a.combine = 'plus') + } else if(thr.m == "max"){ + log10pmax_f <- function(X, ind) rowMaxs(as.matrix(X[, ind])) + thr_log10p <- big_apply(gwas2, + a.FUN = log10pmax_f, + ind = ind_p, a.combine = 'c') + } + gwas2$add_columns(ncol_add = 1) + colnames_fbm <- c(colnames_fbm, paste0(thr.m, "_thr_log10p")) + gwas2[,(sum(gwas_ok)*3+1)] <- thr_log10p + gwas2$save() + + ## replace NA or Nan values + # Replace SE with 1's, estimates and p values with 0's. + replace_na_1 <- function(X, ind) replace_na(X[, ind], 1) + replace_na_0 <- function(X, ind) replace_na(X[, ind], 0) + gwas2[,ind_se] <- big_apply(gwas2, a.FUN = replace_na_1, ind = ind_se, + a.combine = 'plus') + gwas2[,ind_estim] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_estim, + a.combine = 'plus') + gwas2[,ind_p] <- big_apply(gwas2, a.FUN = replace_na_0, ind = ind_p, + a.combine = 'plus') + gwas2[,(sum(gwas_ok)*3+1)] <- big_apply(gwas2, a.FUN = replace_na_0, + ind = (sum(gwas_ok)*3+1), + a.combine = 'plus') + gwas2$save() + + strong_clumps <- snp_clumping(G, infos.chr = markers$CHRN, thr.r2 = thr.r2, + infos.pos = markers$POS, S = thr_log10p) + random_clumps <- snp_clumping(G, infos.chr = markers$CHRN, thr.r2 = thr.r2, + infos.pos = markers$POS) + # this should be a top_n (slice_min/slice_max/slice_sample) with numSNPs, not a quantile + strong_sample <- add_column(markers, thr_log10p) %>% + rownames_to_column(var = "value") %>% + mutate(value = as.numeric(.data$value)) %>% + filter(.data$value %in% strong_clumps) %>% + slice_max(order_by = .data$thr_log10p, n = num.strong) %>% + arrange(.data$value) + + if (is.na(num.random)[1]) { + num.random <- num.strong*2 + } + random_sample <- add_column(markers, thr_log10p) %>% + rownames_to_column(var = "value") %>% + mutate(value = as.numeric(.data$value)) %>% + filter(!is.na(.data$thr_log10p)) %>% + filter(.data$value %in% random_clumps) %>% + slice_sample(n = num.random) %>% + arrange(.data$value) + + ## scaling + if (scale.phe == TRUE) { + colmaxes <- function(X, ind) colMaxs(abs(as.matrix(X[, ind]))) + scale.effects <- big_apply(gwas2, a.FUN = colmaxes, + ind = ind_estim, a.combine = 'c') + colstand <- function(X, ind, v) X[,ind] / v + for (j in seq_along(scale.effects)) { # standardize one gwas at a time. + gwas2[,c(ind_estim[j], ind_se[j])] <- + big_apply(gwas2, a.FUN = colstand, ind = c(ind_estim[j], ind_se[j]), + v = scale.effects[j], a.combine = 'plus') + } + gwas2$save() + gwas_metadata <- gwas_metadata %>% mutate(scaled = TRUE) + } else { + gwas_metadata <- gwas_metadata %>% mutate(scaled = FALSE) + } + + write_csv(tibble(colnames_fbm), file.path(outputdir, + paste0("gwas_effects", suffix, + "_column_names.csv"))) + write_csv(gwas_metadata, file.path(outputdir, + paste0("gwas_effects", suffix, + "_associated_metadata.csv"))) + ## make mash input data.frames (6x or more) + + Bhat_strong <- as.matrix(gwas2[strong_sample$value, ind_estim], ) + Shat_strong <- as.matrix(gwas2[strong_sample$value, ind_se]) + + Bhat_random <- as.matrix(gwas2[random_sample$value, ind_estim]) + Shat_random <- as.matrix(gwas2[random_sample$value, ind_se]) + + ## Full data: Both Bhat and Shat are zero (or near zero) for some input data. + ## Filter this data from the input, or set Shat to a positive number to + ## avoid numerical issues. which rowSums are 0, filter these out or make +. + ## Eventually want to batch process SNPs through this, not make a full set. + Bhat_full <- as.matrix(gwas2[, ind_estim]) + Shat_full <- as.matrix(gwas2[, ind_se]) + + ## name the columns for these conditions (usually the phenotype) + colnames(Bhat_strong) <- gwas_metadata$phe + colnames(Shat_strong) <- gwas_metadata$phe + colnames(Bhat_random) <- gwas_metadata$phe + colnames(Shat_random) <- gwas_metadata$phe + colnames(Bhat_full) <- gwas_metadata$phe + colnames(Shat_full) <- gwas_metadata$phe + + # 5. mash ---- + + data_r <- mashr::mash_set_data(Bhat_random, Shat_random) + message(paste0("Estimating the correlation structure in the null tests from ", + "the random data. + (not the strong data because it will not necessarily contain + any null tests).")) + Vhat <- mashr::estimate_null_correlation_simple(data = data_r) + + message(paste0("Setting up the main data objects with this correlation ", + "structure in place.")) + data_strong <- mashr::mash_set_data(Bhat_strong, Shat_strong, V = Vhat) + data_random <- mashr::mash_set_data(Bhat_random, Shat_random, V = Vhat) + data_full <- mashr::mash_set_data(Bhat_full, Shat_full, V = Vhat) + U_c <- mashr::cov_canonical(data_random) + + if (!is.na(U.ed[1])) { + message(paste0("Now estimating data-driven covariances using the strong", + " tests. + NB: This step may take some time to complete.")) + if (length(ind_p) < 6) { + cov_npc <- ind_p - 1 + } else { + cov_npc <- 5 + } + U_pca = mashr::cov_pca(data_strong, npc = cov_npc) + U_ed = mashr::cov_ed(data_strong, U_pca) + saveRDS(U_ed, file = paste0(outputdir, "Mash_U_ed", suffix, ".rds")) + } else if (typeof(U.ed) == "list") { + U_ed <- U.ed + } else if (typeof(U.ed) == "character") { + U_ed <- readRDS(file = U.ed) + } else { + stop("U.ed should be NA, a list created using 'mashr::cov_ed', ", + "or a file path of a U_ed saved as an .rds") + } + + if (typeof(U.hyp) == "list") { + m = mashr::mash(data_random, Ulist = c(U_ed, U_c, U.hyp), outputlevel = 1) + } else { + m = mashr::mash(data_random, Ulist = c(U_ed, U_c), outputlevel = 1) + } + message(paste0("Compute posterior matrices for all effects", + " using the mash fit from the + random tests.")) + m2 = mashr::mash(data_full, g = ashr::get_fitted_g(m), fixg = TRUE) + + return(m2) +} + + + + +#' Wrapper for bigsnpr for GWAS +#' +#' @description Given a dataframe of phenotypes associated with sample.IDs, this +#' function is a wrapper around bigsnpr functions to conduct linear or +#' logistic regression on wheat. The main advantages of this +#' function over just using the bigsnpr functions is that it automatically +#' removes individual genotypes with missing phenotypic data +#' and that it can run GWAS on multiple phenotypes sequentially. +#' +#' @param df Dataframe of phenotypes where the first column is sample.ID +#' @param type Character string. Type of univarate regression to run for GWAS. +#' Options are "linear" or "logistic". +#' @param snp Genomic information to include for wheat. +#' @param svd Optional covariance matrix to include in the regression. You +#' can generate these using \code{bigsnpr::snp_autoSVD()}. +#' @param npcs Integer. Number of PCs to use for population structure correction. +#' +#' @import bigsnpr +#' @import bigstatsr +#' @importFrom dplyr mutate rename case_when +#' @importFrom purrr as_vector +#' @importFrom tibble as_tibble enframe +#' @importFrom rlang .data +#' +#' @return The gwas results for the last phenotype in the dataframe. That +#' phenotype, as well as the remaining phenotypes, are saved as RDS objects +#' in the working directory. +#' +#' @export +div_gwas <- function(df, snp, type, svd, npcs){ + stopifnot(type %in% c("linear", "logistic")) + if(attr(snp, "class") != "bigSNP"){ + stop("snp needs to be a bigSNP object, produced by the bigsnpr package.") + } + if(colnames(df)[1] != "sample.ID"){ + stop("First column of phenotype dataframe (df) must be 'sample.ID'.") + } + G <- snp$genotypes + pc_max = ncol(svd$u) + + for(i in seq_along(names(df))[-1]){ + y1 <- as_vector(df[which(!is.na(df[,i])), i]) + ind_y <- which(!is.na(df[,i])) + + if(type == "linear"){ + if(npcs > 0){ + ind_u <- matrix(svd$u[which(!is.na(df[,i])),1:npcs], ncol = npcs) + gwaspc <- big_univLinReg(G, y.train = y1, covar.train = ind_u, + ind.train = ind_y, ncores = 1) + } else { + gwaspc <- big_univLinReg(G, y.train = y1, ind.train = ind_y, + ncores = 1) + } + } else if(type == "logistic"){ + message(paste0("For logistic models, if convergence is not reached by ", + "the main algorithm for any SNP, the corresponding `niter` element ", + "is set to NA, and glm is used instead. If glm can't ", + "converge either, those SNP estimations are set to NA.")) + if(npcs > 0){ + ind_u <- matrix(svd$u[which(!is.na(df[,i])),1:npcs], ncol = npcs) + gwaspc <- suppressMessages(big_univLogReg(G, y01.train = y1, + covar.train = ind_u, + ind.train = ind_y, + ncores = 1)) + } else { + gwaspc <- suppressMessages(big_univLogReg(G, y01.train = y1, + ind.train = ind_y, + ncores = 1)) + } + } else { + stop(paste0("Type of GWAS not recognized: please choose one of 'linear'", + " or 'logistic'")) + } + + } + return(gwaspc) +} + + +#' Create a quantile-quantile plot with ggplot2. +#' +#' @description Assumptions for this quantile quantile plot: +#' Expected P values are uniformly distributed. +#' Confidence intervals assume independence between tests. +#' We expect deviations past the confidence intervals if the tests are +#' not independent. +#' For example, in a genome-wide association study, the genotype at any +#' position is correlated to nearby positions. Tests of nearby genotypes +#' will result in similar test statistics. +#' +#' @param ps Numeric vector of p-values. +#' @param ci Numeric. Size of the confidence interval, 0.95 by default. +#' @param lambdaGC Logical. Add the Genomic Control coefficient as subtitle to +#' the plot? +#' +#' @import ggplot2 +#' @importFrom tibble as_tibble +#' @importFrom rlang .data +#' @importFrom stats qbeta ppoints +#' @param tol Numeric. Tolerance for optional Genomic Control coefficient. +#' +#' @return A ggplot2 plot. +#' +#' @export +get_qqplot <- function(ps, ci = 0.95, lambdaGC = FALSE, tol = 1e-8) { + ps <- ps[which(!is.na(ps))] + n <- length(ps) + df <- data.frame( + observed = -log10(sort(ps)), + expected = -log10(ppoints(n)), + clower = -log10(qbeta(p = (1 - ci) / 2, shape1 = 1:n, shape2 = n:1)), + cupper = -log10(qbeta(p = (1 + ci) / 2, shape1 = 1:n, shape2 = n:1)) + ) + df_round <- round_xy(df$expected, df$observed, cl = df$clower, cu = df$cupper) + log10Pe <- expression(paste("Expected -log"[10], plain("("), italic(p-value), + plain(")"))) + log10Po <- expression(paste("Observed -log"[10], plain("("), italic(p-value), + plain(")"))) + p1 <- ggplot(as_tibble(df_round)) + + geom_point(aes(.data$expected, .data$observed), shape = 1, size = 1) + + geom_abline(intercept = 0, slope = 1, size = 1.5, color = "red") + + geom_line(aes(.data$expected, .data$cupper), linetype = 2) + + geom_line(aes(.data$expected, .data$clower), linetype = 2) + + xlab(log10Pe) + + ylab(log10Po) + + theme_classic() + + theme(axis.title = element_text(size = 10), + axis.text = element_text(size = 10), + axis.line.x = element_line(size = 0.35, colour = 'grey50'), + axis.line.y = element_line(size = 0.35, colour = 'grey50'), + axis.ticks = element_line(size = 0.25, colour = 'grey50'), + legend.justification = c(1, 0.75), legend.position = c(1, 0.9), + legend.key.size = unit(0.35, 'cm'), + legend.title = element_blank(), + legend.text = element_text(size = 9), + legend.text.align = 0, legend.background = element_blank(), + plot.subtitle = element_text(size = 10, vjust = 0), + strip.background = element_blank(), + strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0), + strip.placement = 'outside', panel.spacing.x = unit(-0.4, 'cm')) + + if (lambdaGC) { + lamGC <- get_lambdagc(ps = ps, tol = tol) + expr <- substitute(expression(lambda[GC] == l), list(l = lamGC)) + p1 + labs(subtitle = eval(expr)) + } else { + p1 + } +} + + +#' Return a number rounded to some number of digits +#' +#' @description Given some x, return the number rounded to some number of +#' digits. +#' +#' @param x A number or vector of numbers +#' @param at Numeric. Rounding factor or size of the bin to round to. +#' +#' @return A number or vector of numbers +round2 <- function(x, at) ceiling(x / at) * at + +#' Return a dataframe binned into 2-d bins by some x and y. +#' +#' @description Given a dataframe of x and y values (with some optional +#' confidence intervals surrounding the y values), return only the unique +#' values of x and y in some set of 2-d bins. +#' +#' @param x Numeric vector. The first vector for binning. +#' @param y Numeric vector. the second vector for binning +#' @param cl Numeric vector. Optional confidence interval for the y vector, +#' lower bound. +#' @param cu Numeric vector. Optional confidence interval for the y vector, +#' upper bound. +#' @param roundby Numeric. The amount to round the x and y vectors by for 2d +#' binning. +#' +#' @return A dataframe containing the 2-d binned values for x and y, and their +#' confidence intervals. +round_xy <- function(x, y, cl = NA, cu = NA, roundby = 0.001){ + expected <- round2(x, at = roundby) + observed <- round2(y, at = roundby) + if(!is.na(cl[1]) & !is.na(cu[1])){ + clower <- round2(cl, at = roundby) + cupper <- round2(cu, at = roundby) + tp <- cbind(expected, observed, clower, cupper) + return(tp[!duplicated(tp),]) + } else { + tp <- cbind(expected, observed) + return(tp[!duplicated(tp),]) + } +} + +get_manhattan <- function(log10p, snp, thresh){ + plot_data <- tibble(CHR = snp$map$chromosome, POS = snp$map$physical.pos, + marker.ID = snp$map$marker.ID, log10p = log10p) %>% + mutate(observed = round2(.data$log10p, at = 0.001)) %>% + group_by(.data$CHR, .data$POS, .data$observed) %>% + slice(1) + + nchr <- length(unique(plot_data$CHR)) + + p1 <- plot_data %>% + ggplot(aes(x = .data$POS, y = .data$log10p)) + + geom_point(aes(color = .data$CHR, fill = .data$CHR)) + + geom_hline(yintercept = thresh, color = "black", linetype = 2, + size = 1) + + facet_wrap(~ .data$CHR, nrow = 1, scales = "free_x", + strip.position = "bottom") + + scale_color_manual(values = rep(c("#1B0C42FF", "#48347dFF", + "#95919eFF"), ceiling(nchr/3)), + guide = FALSE) + + theme_classic() + + theme(axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + panel.background = element_rect(fill=NA), + legend.position = "none", + axis.title = element_text(size = 10), + axis.text = element_text(size = 10), + axis.line.x = element_line(size = 0.35, colour = 'grey50'), + axis.line.y = element_line(size = 0.35, colour = 'grey50'), + axis.ticks = element_line(size = 0.25, colour = 'grey50'), + legend.justification = c(1, 0.75), + legend.key.size = unit(0.35, 'cm'), + legend.title = element_blank(), + legend.text = element_text(size = 9), + legend.text.align = 0, legend.background = element_blank(), + plot.subtitle = element_text(size = 10, vjust = 0), + strip.background = element_blank(), + strip.text = element_text(hjust = 0.5, size = 10 ,vjust = 0), + strip.placement = 'outside', panel.spacing.x = unit(-0.1, 'cm')) + + labs(x = "Chromosome", y = "-log10(p value)") + + scale_x_continuous(expand = c(0.2, 0.2)) + return(p1) +} + +#' Return lambda_GC for different numbers of PCs for GWAS on Panicum virgatum. +#' +#' @description Given a dataframe of phenotypes associated with sample.IDs and +#' output from a PCA to control for population structure, this function will +#' return a .csv file of the lambda_GC values for the GWAS upon inclusion +#' of different numbers of PCs. This allows the user to choose a number of +#' PCs that returns a lambda_GC close to 1, and thus ensure that they have +#' done adequate correction for population structure. +#' +#' @param df Dataframe of phenotypes where the first column is sample.ID and each +#' sample.ID occurs only once in the dataframe. +#' @param type Character string. Type of univarate regression to run for GWAS. +#' Options are "linear" or "logistic". +#' @param snp A bigSNP object with sample.IDs that match the df. +#' @param svd big_SVD object; Covariance matrix to include in the regression. +#' Generate these using \code{bigsnpr::snp_autoSVD()}. +#' @param ncores Number of cores to use. Default is one. +#' @param npcs Integer vector of principle components to use. +#' Defaults to c(0:10). +#' @param saveoutput Logical. Should output be saved as a csv to the +#' working directory? +#' +#' @import bigsnpr +#' @import bigstatsr +#' @importFrom dplyr mutate rename case_when mutate_if +#' @importFrom purrr as_vector +#' @importFrom tibble as_tibble enframe +#' @importFrom rlang .data +#' @importFrom readr write_csv +#' @importFrom utils tail +#' +#' @return A dataframe containing the lambda_GC values for each number of PCs +#' specified. This is also saved as a .csv file in the working directory. +#' +#' @export +div_lambda_GC <- function(df, type = c("linear", "logistic"), snp, + svd = NA, ncores = 1, npcs = c(0:10), + saveoutput = FALSE){ + if(attr(snp, "class") != "bigSNP"){ + stop("snp needs to be a bigSNP object, produced by the bigsnpr package.") + } + if(colnames(df)[1] != "sample.ID"){ + stop("First column of phenotype dataframe (df) must be 'sample.ID'.") + } + if(length(svd) == 1){ + stop(paste0("Need to specify covariance matrix (svd) and a vector of", + " PC #'s to test (npcs).")) + } + + + G <- snp$genotypes + + LambdaGC <- as_tibble(matrix(data = + c(npcs, rep(NA, (ncol(df) - 1)*length(npcs))), + nrow = length(npcs), ncol = ncol(df), + dimnames = list(npcs, colnames(df)))) + LambdaGC <- LambdaGC %>% + dplyr::rename("NumPCs" = .data$sample.ID) %>% + mutate_if(is.integer, as.numeric) + + for (i in seq_along(names(df))[-1]) { + + for (k in c(1:length(npcs))) { + + if (type == "linear") { + + y1 <- as_vector(df[which(!is.na(df[,i])), i]) + ind_y <- which(!is.na(df[,i])) + + if (npcs[k] == 0) { + + gwaspc <- big_univLinReg(G, y.train = y1, ind.train = ind_y, + ncores = ncores) + } else { + + ind_u <- matrix(svd$u[which(!is.na(df[,i])),1:npcs[k]], + ncol = npcs[k]) + gwaspc <- big_univLinReg(G, y.train = y1, covar.train = ind_u, + ind.train = ind_y, ncores = ncores) + } + } else if(type == "logistic"){ + message(paste0("For logistic models, if convergence is not reached by ", + "the main algorithm for some SNPs, the corresponding `niter` element ", + "is set to NA, and glm is used instead. If glm can't ", + "converge either, those SNP estimations are set to NA.")) + y1 <- as_vector(df[which(!is.na(df[,i])), i]) + ind_y <- which(!is.na(df[,i])) + if(npcs[k] == 0){ + gwaspc <- suppressMessages(big_univLogReg(G, y01.train = y1, + ind.train = ind_y, + ncores = ncores)) + } else { + ind_u <- matrix(svd$u[which(!is.na(df[,i])),1:npcs[k]], + ncol = npcs[k]) + gwaspc <- suppressMessages(big_univLogReg(G, y01.train = y1, + covar.train = ind_u, + ind.train = ind_y, + ncores = ncores)) + } + } + ps <- predict(gwaspc, log10 = FALSE) + LambdaGC[k,i] <- get_lambdagc(ps = ps) + #message(paste0("Finished Lambda_GC calculation for ", names(df)[i], + # " using ", npcs[k], " PCs.")) + } + + if(saveoutput == TRUE){ + write_csv(LambdaGC, path = paste0("Lambda_GC_", names(df)[i], ".csv")) + } + #message(paste0("Finished phenotype ", i-1, ": ", names(df)[i])) + } + if(saveoutput == TRUE){ + write_csv(LambdaGC, path = paste0("Lambda_GC_", names(df)[2], "_to_", + tail(names(df), n = 1), "_Phenotypes_", + npcs[1], "_to_", tail(npcs, n = 1), + "_PCs.csv")) + best_LambdaGC <- get_best_PC_df(df = LambdaGC) + write_csv(best_LambdaGC, path = paste0("Best_Lambda_GC_", names(df)[2], + "_to_", tail(names(df), n = 1), + "_Phenotypes_", npcs[1], "_to_", + tail(npcs, n = 1), "_PCs.csv")) + } + return(LambdaGC) +} + +#' Find lambda_GC value for non-NA p-values +#' +#' @description Finds the lambda GC value for some vector of p-values. +#' +#' @param ps Numeric vector of p-values. Can have NA's. +#' @param tol Numeric. Tolerance for optional Genomic Control coefficient. +#' +#' @importFrom stats median uniroot +#' +#' @return A lambda GC value (some positive number, ideally ~1) +#' +#' @export +get_lambdagc <- function(ps, tol = 1e-8){ + ps <- ps[which(!is.na(ps))] + xtr <- log10(ps) + MEDIAN <- log10(0.5) + f.opt <- function(x) (x - MEDIAN) + xtr_p <- median(xtr) / uniroot(f.opt, interval = range(xtr), + check.conv = TRUE, + tol = tol)$root + lamGC <- signif(xtr_p) + return(lamGC) +} + + +#' Return best number of PCs in terms of lambda_GC for Panicum virgatum. +#' Return best number of PCs in terms of lambda_GC for the CDBN. +#' +#' @description Given a dataframe created using pvdiv_lambda_GC, this function +#' returns the first lambda_GC less than 1.05, or the smallest lambda_GC, +#' for each column in the dataframe. +#' +#' @param df Dataframe of phenotypes where the first column is NumPCs and +#' subsequent column contains lambda_GC values for some phenotype. +#' +#' @importFrom dplyr filter top_n select full_join arrange +#' @importFrom tidyr gather +#' @importFrom rlang .data sym !! +#' @importFrom tidyselect all_of +#' +#' @return A dataframe containing the best lambda_GC value and number of PCs +#' for each phenotype in the data frame. +get_best_PC_df <- function(df){ + column <- names(df)[ncol(df)] + bestPCs <- df %>% + filter(!! sym(column) < 1.05| !! sym(column) == min(!! sym(column))) %>% + top_n(n = -1, wt = .data$NumPCs) %>% + select(.data$NumPCs, all_of(column)) + + if(ncol(df) > 2){ + for(i in c((ncol(df)-2):1)){ + column <- names(df)[i+1] + + bestPCs <- df %>% + filter(!! sym(column) < 1.05 | !! sym(column) == min(!! sym(column))) %>% + top_n(n = -1, wt = .data$NumPCs) %>% + select(.data$NumPCs, all_of(column)) %>% + full_join(bestPCs, by = c("NumPCs", (column))) + } + } + + bestPCdf <- bestPCs %>% + arrange(.data$NumPCs) %>% + gather(key = "trait", value = "lambda_GC", 2:ncol(bestPCs)) %>% + filter(!is.na(.data$lambda_GC)) + + return(bestPCdf) +} + + +div_mash <- function(){} + + +check_gwas <- function(df1, phename, type, nPhe, minphe, nLev){ + if(nPhe < minphe){ + message(paste0("The phenotype ", phename, " does not have the minimum ", + "number of phenotyped sample.ID's, (", minphe, ") and so ", + "will not be used for GWAS.")) + gwas_ok <- FALSE + } else if(nLev < 2){ + message(paste0("The phenotype ", phename, " does not have two or more ", + "distinct non-NA values and will not be used for GWAS.")) + gwas_ok <- FALSE + } else if(nLev > 2 & type == "logistic"){ + message(paste0("The phenotype ", phename, " has more than two distinct ", + "non-NA values and will not be used for GWAS with 'type=", + "logistic'.")) + gwas_ok <- FALSE + } else if(!(unique(df1[which(!is.na(df1[,2])),2])[1,1] %in% c(0,1)) & + !(unique(df1[which(!is.na(df1[,2])),2])[2,1] %in% c(0,1)) & + type == "logistic"){ + message(paste0("The phenotype ", phename, " has non-NA values that are ", + "not 0 or 1 and will not be used for GWAS with 'type=", + "logistic'.")) + gwas_ok <- FALSE + } else { + gwas_ok <- TRUE + } + return(gwas_ok) +} diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..238c6b7 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1 @@ +https://github.com/Alice-MacQueen/snpdiver diff --git a/man/div_gwas.Rd b/man/div_gwas.Rd new file mode 100644 index 0000000..6da3563 --- /dev/null +++ b/man/div_gwas.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrapper.R +\name{div_gwas} +\alias{div_gwas} +\title{Wrapper for bigsnpr for GWAS} +\usage{ +div_gwas(df, snp, type, svd, npcs) +} +\arguments{ +\item{df}{Dataframe of phenotypes where the first column is sample.ID} + +\item{snp}{Genomic information to include for wheat.} + +\item{type}{Character string. Type of univarate regression to run for GWAS. +Options are "linear" or "logistic".} + +\item{svd}{Optional covariance matrix to include in the regression. You +can generate these using \code{bigsnpr::snp_autoSVD()}.} + +\item{npcs}{Integer. Number of PCs to use for population structure correction.} +} +\value{ +The gwas results for the last phenotype in the dataframe. That +phenotype, as well as the remaining phenotypes, are saved as RDS objects +in the working directory. +} +\description{ +Given a dataframe of phenotypes associated with sample.IDs, this +function is a wrapper around bigsnpr functions to conduct linear or +logistic regression on wheat. The main advantages of this +function over just using the bigsnpr functions is that it automatically +removes individual genotypes with missing phenotypic data +and that it can run GWAS on multiple phenotypes sequentially. +} diff --git a/man/div_lambda_GC.Rd b/man/div_lambda_GC.Rd new file mode 100644 index 0000000..e545929 --- /dev/null +++ b/man/div_lambda_GC.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrapper.R +\name{div_lambda_GC} +\alias{div_lambda_GC} +\title{Return lambda_GC for different numbers of PCs for GWAS on Panicum virgatum.} +\usage{ +div_lambda_GC( + df, + type = c("linear", "logistic"), + snp, + svd = NA, + ncores = 1, + npcs = c(0:10), + saveoutput = FALSE +) +} +\arguments{ +\item{df}{Dataframe of phenotypes where the first column is sample.ID and each +sample.ID occurs only once in the dataframe.} + +\item{type}{Character string. Type of univarate regression to run for GWAS. +Options are "linear" or "logistic".} + +\item{snp}{A bigSNP object with sample.IDs that match the df.} + +\item{svd}{big_SVD object; Covariance matrix to include in the regression. +Generate these using \code{bigsnpr::snp_autoSVD()}.} + +\item{ncores}{Number of cores to use. Default is one.} + +\item{npcs}{Integer vector of principle components to use. +Defaults to c(0:10).} + +\item{saveoutput}{Logical. Should output be saved as a csv to the +working directory?} +} +\value{ +A dataframe containing the lambda_GC values for each number of PCs +specified. This is also saved as a .csv file in the working directory. +} +\description{ +Given a dataframe of phenotypes associated with sample.IDs and +output from a PCA to control for population structure, this function will +return a .csv file of the lambda_GC values for the GWAS upon inclusion +of different numbers of PCs. This allows the user to choose a number of +PCs that returns a lambda_GC close to 1, and thus ensure that they have +done adequate correction for population structure. +} diff --git a/man/dive_phe2mash.Rd b/man/dive_phe2mash.Rd new file mode 100644 index 0000000..cfe1422 --- /dev/null +++ b/man/dive_phe2mash.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrapper.R +\name{dive_phe2mash} +\alias{dive_phe2mash} +\title{Wrapper to run mash given a phenotype data frame} +\usage{ +dive_phe2mash( + df, + snp, + type = "linear", + svd = NULL, + suffix = "", + outputdir = ".", + min.phe = 200, + save.plots = TRUE, + thr.r2 = 0.2, + thr.m = c("sum", "max"), + num.strong = 1000, + num.random = NA, + scale.phe = TRUE, + roll.size = 50, + U.ed = NA, + U.hyp = NA +) +} +\arguments{ +\item{df}{Dataframe containing phenotypes for mash where the first column is +'sample.ID', which should match values in the snp$fam$sample.ID column.} + +\item{snp}{A "bigSNP" object; load with \code{snp_attach()}.} + +\item{type}{Character string, or a character vector the length of the number +of phenotypes. Type of univarate regression to run for GWAS. +Options are "linear" or "logistic".} + +\item{svd}{A "big_SVD" object; Optional covariance matrix to use for +population structure correction.} + +\item{suffix}{Optional character vector to give saved files a unique search string/name.} + +\item{outputdir}{Optional file path to save output files.} + +\item{min.phe}{Integer. Minimum number of individuals phenotyped in order to +include that phenotype in GWAS. Default is 200. Use lower values with +caution.} + +\item{save.plots}{Logical. Should Manhattan and QQ-plots be generated and +saved to the working directory for univariate GWAS? Default is TRUE.} + +\item{thr.r2}{Value between 0 and 1. Threshold of r2 measure of linkage +disequilibrium. Markers in higher LD than this will be subset using clumping.} + +\item{thr.m}{"sum" or "max". Type of threshold to use to clump values for +mash inputs. "sum" sums the -log10pvalues for each phenotype and uses +the maximum of this value as the threshold. "max" uses the maximum +-log10pvalue for each SNP across all of the univariate GWAS.} + +\item{num.strong}{Integer. Number of SNPs used to derive data-driven covariance +matrix patterns, using markers with strong effects on phenotypes.} + +\item{num.random}{Integer. Number of SNPs used to derive the correlation structure +of the null tests, and the mash fit on the null tests.} + +\item{scale.phe}{Logical. Should effects for each phenotype be scaled to fall +between -1 and 1? Default is TRUE.} + +\item{roll.size}{Integer. Used to create the svd for GWAS.} + +\item{U.ed}{Mash data-driven covariance matrices. Specify these as a list or a path +to a file saved as an .rds. Creating these can be time-consuming, and +generating these once and reusing them for multiple mash runs can save time.} + +\item{U.hyp}{Other covariance matrices for mash. Specify these as a list. These +matrices must have dimensions that match the number of phenotypes where +univariate GWAS ran successfully.} +} +\value{ +A mash object made up of all phenotypes where univariate GWAS ran +successfully. +} +\description{ +Though step-by-step GWAS, preparation of mash inputs, and mash +allows you the most flexibility and opportunities to check your results +for errors, once those sanity checks are complete, this function allows +you to go from a phenotype data.frame of a few phenotypes you want to +compare to a mash result. Some exception handling has been built into +this function, but the user should stay cautious and skeptical of any +results that seem 'too good to be true'. +} diff --git a/man/get_best_PC_df.Rd b/man/get_best_PC_df.Rd new file mode 100644 index 0000000..a0623fd --- /dev/null +++ b/man/get_best_PC_df.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrapper.R +\name{get_best_PC_df} +\alias{get_best_PC_df} +\title{Return best number of PCs in terms of lambda_GC for Panicum virgatum. +Return best number of PCs in terms of lambda_GC for the CDBN.} +\usage{ +get_best_PC_df(df) +} +\arguments{ +\item{df}{Dataframe of phenotypes where the first column is NumPCs and +subsequent column contains lambda_GC values for some phenotype.} +} +\value{ +A dataframe containing the best lambda_GC value and number of PCs +for each phenotype in the data frame. +} +\description{ +Given a dataframe created using pvdiv_lambda_GC, this function +returns the first lambda_GC less than 1.05, or the smallest lambda_GC, +for each column in the dataframe. +} diff --git a/man/get_lambdagc.Rd b/man/get_lambdagc.Rd new file mode 100644 index 0000000..0ec6e11 --- /dev/null +++ b/man/get_lambdagc.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrapper.R +\name{get_lambdagc} +\alias{get_lambdagc} +\title{Find lambda_GC value for non-NA p-values} +\usage{ +get_lambdagc(ps, tol = 1e-08) +} +\arguments{ +\item{ps}{Numeric vector of p-values. Can have NA's.} + +\item{tol}{Numeric. Tolerance for optional Genomic Control coefficient.} +} +\value{ +A lambda GC value (some positive number, ideally ~1) +} +\description{ +Finds the lambda GC value for some vector of p-values. +} diff --git a/man/get_qqplot.Rd b/man/get_qqplot.Rd new file mode 100644 index 0000000..0254e68 --- /dev/null +++ b/man/get_qqplot.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrapper.R +\name{get_qqplot} +\alias{get_qqplot} +\title{Create a quantile-quantile plot with ggplot2.} +\usage{ +get_qqplot(ps, ci = 0.95, lambdaGC = FALSE, tol = 1e-08) +} +\arguments{ +\item{ps}{Numeric vector of p-values.} + +\item{ci}{Numeric. Size of the confidence interval, 0.95 by default.} + +\item{lambdaGC}{Logical. Add the Genomic Control coefficient as subtitle to +the plot?} + +\item{tol}{Numeric. Tolerance for optional Genomic Control coefficient.} +} +\value{ +A ggplot2 plot. +} +\description{ +Assumptions for this quantile quantile plot: +Expected P values are uniformly distributed. +Confidence intervals assume independence between tests. +We expect deviations past the confidence intervals if the tests are +not independent. +For example, in a genome-wide association study, the genotype at any +position is correlated to nearby positions. Tests of nearby genotypes +will result in similar test statistics. +} diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 0000000..0eec752 --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-pipe.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\description{ +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +} +\keyword{internal} diff --git a/man/round2.Rd b/man/round2.Rd new file mode 100644 index 0000000..f70190a --- /dev/null +++ b/man/round2.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrapper.R +\name{round2} +\alias{round2} +\title{Return a number rounded to some number of digits} +\usage{ +round2(x, at) +} +\arguments{ +\item{x}{A number or vector of numbers} + +\item{at}{Numeric. Rounding factor or size of the bin to round to.} +} +\value{ +A number or vector of numbers +} +\description{ +Given some x, return the number rounded to some number of +digits. +} diff --git a/man/round_xy.Rd b/man/round_xy.Rd new file mode 100644 index 0000000..6c89858 --- /dev/null +++ b/man/round_xy.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrapper.R +\name{round_xy} +\alias{round_xy} +\title{Return a dataframe binned into 2-d bins by some x and y.} +\usage{ +round_xy(x, y, cl = NA, cu = NA, roundby = 0.001) +} +\arguments{ +\item{x}{Numeric vector. The first vector for binning.} + +\item{y}{Numeric vector. the second vector for binning} + +\item{cl}{Numeric vector. Optional confidence interval for the y vector, +lower bound.} + +\item{cu}{Numeric vector. Optional confidence interval for the y vector, +upper bound.} + +\item{roundby}{Numeric. The amount to round the x and y vectors by for 2d +binning.} +} +\value{ +A dataframe containing the 2-d binned values for x and y, and their +confidence intervals. +} +\description{ +Given a dataframe of x and y values (with some optional +confidence intervals surrounding the y values), return only the unique +values of x and y in some set of 2-d bins. +} diff --git a/man/tidyeval.Rd b/man/tidyeval.Rd new file mode 100644 index 0000000..ebcd89a --- /dev/null +++ b/man/tidyeval.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-tidy-eval.R +\name{tidyeval} +\alias{tidyeval} +\alias{expr} +\alias{enquo} +\alias{enquos} +\alias{sym} +\alias{syms} +\alias{.data} +\alias{:=} +\alias{as_name} +\alias{as_label} +\title{Tidy eval helpers} +\description{ +\itemize{ +\item \code{\link[rlang]{sym}()} creates a symbol from a string and +\code{\link[rlang:sym]{syms}()} creates a list of symbols from a +character vector. +\item \code{\link[rlang:nse-defuse]{enquo}()} and +\code{\link[rlang:nse-defuse]{enquos}()} delay the execution of one or +several function arguments. \code{enquo()} returns a single quoted +expression, which is like a blueprint for the delayed computation. +\code{enquos()} returns a list of such quoted expressions. +\item \code{\link[rlang:nse-defuse]{expr}()} quotes a new expression \emph{locally}. It +is mostly useful to build new expressions around arguments +captured with \code{\link[=enquo]{enquo()}} or \code{\link[=enquos]{enquos()}}: +\code{expr(mean(!!enquo(arg), na.rm = TRUE))}. +\item \code{\link[rlang]{as_name}()} transforms a quoted variable name +into a string. Supplying something else than a quoted variable +name is an error. + +That's unlike \code{\link[rlang]{as_label}()} which also returns +a single string but supports any kind of R object as input, +including quoted function calls and vectors. Its purpose is to +summarise that object into a single label. That label is often +suitable as a default name. + +If you don't know what a quoted expression contains (for instance +expressions captured with \code{enquo()} could be a variable +name, a call to a function, or an unquoted constant), then use +\code{as_label()}. If you know you have quoted a simple variable +name, or would like to enforce this, use \code{as_name()}. +} + +To learn more about tidy eval and how to use these tools, visit +\url{https://tidyeval.tidyverse.org} and the +\href{https://adv-r.hadley.nz/metaprogramming.html}{Metaprogramming +section} of \href{https://adv-r.hadley.nz}{Advanced R}. +} +\keyword{internal} diff --git a/snpdiver.Rproj b/snpdiver.Rproj new file mode 100644 index 0000000..69fafd4 --- /dev/null +++ b/snpdiver.Rproj @@ -0,0 +1,22 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes +LineEndingConversion: Posix + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace