diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 00000000..0786f583 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,3 @@ +^README.md$ +^.gitignore$ +^.git/.*$ diff --git a/.gitignore b/.gitignore index e9743491..e193b3b1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,27 @@ -# History # -########### -.history - -# OS generated files # -###################### -.DS_Store -.DS_Store? -._* -.Spotlight-V100 -.Trashes -ehthumbs.db -Thumbs.db \ No newline at end of file +/cfg.R +# History files +.Rhistory +.Rapp +*.history + +# Example code in package build process +*-Ex.R + +# RStudio files +*.RData +.Rproj.user/ +.Rproj +.Rproj.user +*.Rdata +*.Rproj + +# TRONCO Output +*.pdf +*.tar.gz +*.tex +*.mat +*.zip +*.png + +# Configuration file +cfg.R diff --git a/COPYING.rtf b/COPYING.rtf deleted file mode 100644 index c7eb980c..00000000 --- a/COPYING.rtf +++ /dev/null @@ -1,78 +0,0 @@ -{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210 -{\fonttbl\f0\froman\fcharset0 Times-Roman;} -{\colortbl;\red255\green255\blue255;} -\paperw11900\paperh16840\margl1440\margr1440\vieww18200\viewh10700\viewkind0 -\deftab720 -\pard\pardeftab720\sa398 - -\f0\b\fs48 \cf0 Eclipse Public License - v 1.0\ -\pard\pardeftab720\sa160 - -\b0\fs32 \cf0 THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.\ -\pard\pardeftab720\sa160 - -\b \cf0 1. DEFINITIONS -\b0 \ -"Contribution" means:\ -\pard\pardeftab720\li960\sa16 -\cf0 a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and\ -b) in the case of each subsequent Contributor:\ -i) changes to the Program, and\ -ii) additions to the Program;\ -where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program.\ -\pard\pardeftab720\sa160 -\cf0 "Contributor" means any person or entity that distributes the Program.\ -"Licensed Patents" mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program.\ -"Program" means the Contributions distributed in accordance with this Agreement.\ -"Recipient" means anyone who receives the Program under this Agreement, including all Contributors.\ -\pard\pardeftab720\sa160 - -\b \cf0 2. GRANT OF RIGHTS -\b0 \ -\pard\pardeftab720\li960\sa16 -\cf0 a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form.\ -b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder.\ -c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program.\ -d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement.\ -\pard\pardeftab720\sa160 - -\b \cf0 3. REQUIREMENTS -\b0 \ -A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that:\ -\pard\pardeftab720\li960\sa16 -\cf0 a) it complies with the terms and conditions of this Agreement; and\ -b) its license agreement:\ -i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose;\ -ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits;\ -iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and\ -iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange.\ -\pard\pardeftab720\sa160 -\cf0 When the Program is made available in source code form:\ -\pard\pardeftab720\li960\sa16 -\cf0 a) it must be made available under this Agreement; and\ -b) a copy of this Agreement must be included with each copy of the Program.\ -\pard\pardeftab720\sa160 -\cf0 Contributors may not remove or alter any copyright notices contained within the Program.\ -Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution.\ -\pard\pardeftab720\sa160 - -\b \cf0 4. COMMERCIAL DISTRIBUTION -\b0 \ -Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense.\ -For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages.\ - -\b 5. NO WARRANTY -\b0 \ -EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement , including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations.\ - -\b 6. DISCLAIMER OF LIABILITY -\b0 \ -EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.\ - -\b 7. GENERAL -\b0 \ -If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.\ -If Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed.\ -All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive.\ -Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved.\ -This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation.} diff --git a/DESCRIPTION b/DESCRIPTION index 5730b0a8..9218f30d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,47 +1,60 @@ Package: TRONCO -Version: 1.1.0 -Date: 2014-09-22 +Version: 2.0.0-8 +Date: 2015-07-21 Title: TRONCO, a package for TRanslational ONCOlogy -Author: Marco Antoniotti, Giulio Caravagna, - Alex Graudenzi, Ilya Korsunsky, - Mattia Longoni, Loes Olde Loohuis, - Giancarlo Mauri, Bud Mishra, Daniele Ramazzotti -Maintainer: Giulio Caravagna , - Alex Graudenzi , - Daniele Ramazzotti -Depends: - R (>= 2.10), - methods, +Authors@R: + c(person("Marco", "Antoniotti", role=c("cph")), + person("Giulio", "Caravagna", role=c("aut", "cre"), email="giulio.caravagna@disco.unimib.it"), + person("Luca", "De Sano", role=c("aut"), email="l.desano@campus.unimib.it"), + person("Alex", "Graudenzi", role=c("aut"), email="alex.graudenzi@disco.unimib.it"), + person("Ilya", "Korsunsky", role=c("cph")), + person("Mattia", "Longoni", role=c("ctb")), + person("Loes", "Olde Loohuis", role=c("cph")), + person("Giancarlo", "Mauri", role=c("cph")), + person("Bud", "Mishra", role=c("cph")), + person("Daniele", "Ramazzotti", role=c("aut"), email="daniele.ramazzotti@disco.unimib.it")) +Depends: + R (>= 3.1), + doParallel, + bnlearn, +Imports: Rgraphviz, - lattice, - graph -Description: Genotype-level cancer progression models describe the ordering of - accumulating mutations, e.g., somatic mutations / copy number variations, - during cancer development. These graphical models help understand the - causal structure involving events promoting cancer progression, possibly - predicting complex patterns characterising genomic progression of a cancer. - Reconstructed models can be used to better characterise genotype-phenotype - relation, and suggest novel targets for therapy design. TRONCO - (TRanslational ONCOlogy) is a R package aimed at collecting - state-of-the-art algorithms to infer progression models from - cross-sectional data, i.e., data collected from independent patients which - does not necessarily incorporate any evident temporal information. These - algorithms require a binary input matrix where: (i) each row represents a - patient genome, (ii) each column an event relevant to the progression (a - priori selected) and a 0/1 value models the absence/presence of a certain - mutation in a certain patient. The current first version of TRONCO - implements the CAPRESE algorithm (Cancer PRogression Extraction with Single - Edges) to infer possible progression models arranged as trees; cfr. - Inferring tree causal models of cancer progression with probability - raising, L. Olde Loohuis, G. Caravagna, A. Graudenzi, D. Ramazzotti, G. - Mauri, M. Antoniotti and B. Mishra. PLoS One, to appear. This vignette - shows how to use TRONCO to infer a tree model of ovarian cancer progression - from CGH data of copy number alterations (classified as gains or losses - over chromosome's arms). The dataset used is available in the SKY/M-FISH - database. -License: EPL (>= 1.0) + ggplot2, + RColorBrewer, + reshape2, + cgdsr, + igraph, + grid, + gridExtra, + xtable, + gtable, + scales +Suggests: + BiocGenerics, + BiocStyle, + testthat, + R.matlab +Description: + TRONCO (TRanslational ONCOlogy) is a R package which collects + algorithms to infer progression models from Bernoulli 0/1 profiles of genomic + alterations across a tumor sample. Such profiles are usually visualised as a + binary input matrix where each row represents a patient's sample (e.g., the + result of a sequenced tumor biopsy), and each column an event relevant to the + progression (a certain type of somatic mutation, a focal or higher-level + chromosomal copy number alteration etc.); a 0/1 value models the absence/presence + of that alteration in the sample. In this version of TRONCO such profiles can + be readily imported by boolean matrices and MAF/GISTIC files. The package provides + various functions to editing, visualise and subset such data, as well as functions + to query the Cbio portal for cancer genomics. This version of TRONCO comes with + the parallel implementations the CAPRESE [PLoS ONE 9(12): e115570] and CAPRI + [Bioinformatics, doi:10.1093/bioinformatics/btv296] algorithms to infer possible + progression models arranged as trees, or general direct acyclic graphs. + Bootstrap functions to assess the parametric, non-prametric and statistical + confidence of every inferred model are also provided. The package comes with + some data available as well, which include the dataset of Atypical Chronic Myeloid + Leukemia samples provided by Piazza et al., Nat. Genet., 45 (2013), and examples. +LazyData: TRUE +License: GPL (>= 3.0) URL: http://bimib.disco.unimib.it +BugReports: https://github.com/BIMIB-DISCo/TRONCO biocViews: Cancer -Suggests: - RUnit, - BiocGenerics diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 00000000..5fdf13ca --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,676 @@ +GNU GENERAL PUBLIC LICENSE +========================== +Version 3, 29 June 2007 +========================== + +> Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. + +# Preamble + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + +# TERMS AND CONDITIONS + +## 0. Definitions. + + _"This License"_ refers to version 3 of the GNU General Public License. + + _"Copyright"_ also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + _"The Program"_ refers to any copyrightable work licensed under this +License. Each licensee is addressed as _"you"_. _"Licensees"_ and +"recipients" may be individuals or organizations. + + To _"modify"_ a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a _"modified version"_ of the +earlier work or a work _"based on"_ the earlier work. + + A _"covered work"_ means either the unmodified Program or a work based +on the Program. + + To _"propagate"_ a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To _"convey"_ a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + +## 1. Source Code. + + The _"source code"_ for a work means the preferred form of the work +for making modifications to it. _"Object code"_ means any non-source +form of a work. + + A _"Standard Interface"_ means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The _"System Libraries"_ of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The _"Corresponding Source"_ for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + +## 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + +## 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + +## 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + +## 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + +## 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A _"User Product"_ is either (1) a _"consumer product"_, which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + _"Installation Information"_ for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + +## 7. Additional Terms. + + _"Additional permissions"_ are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + +## 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + +## 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + +## 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An _"entity transaction"_ is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + +## 11. Patents. + + A _"contributor"_ is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's _"essential patent claims"_ are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + +## 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + +## 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + +## 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + +## 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +## 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + +## 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + +# END OF TERMS AND CONDITIONS +-------------------------------------------------------------------------- + + +# How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type 'show c' for details. + + The hypothetical commands _'show w'_ and _'show c'_ should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..20d40b6b --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index b80584bd..16b8ec6b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,27 +1,135 @@ -# Generated by roxygen2 (4.0.2): do not edit by hand +# Generated by roxygen2 (4.1.1): do not edit by hand -export(confidence.conditional) -export(confidence.data.conditional) -export(confidence.data.joint) -export(confidence.data.single) -export(confidence.fit.conditional) -export(confidence.fit.joint) -export(confidence.fit.single) -export(confidence.joint) -export(confidence.single) -export(data.load) -export(events.add) -export(events.load) -export(reset) -export(reset.events) -export(reset.types) +export(AND) +export(OR) +export(TCGA.map.clinical.data) +export(TCGA.multiple.samples) +export(TCGA.remove.multiple.samples) +export(TCGA.shorten.barcodes) +export(XOR) +export(annotate.description) +export(annotate.stages) +export(as.adj.matrix) +export(as.alterations) +export(as.colors) +export(as.conditional.probs) +export(as.confidence) +export(as.description) +export(as.error.rates) +export(as.events) +export(as.events.in.patterns) +export(as.events.in.sample) +export(as.gene) +export(as.genes) +export(as.genes.in.patterns) +export(as.genotypes) +export(as.hypotheses) +export(as.joint.probs) +export(as.marginal.probs) +export(as.models) +export(as.pathway) +export(as.patterns) +export(as.samples) +export(as.selective.advantage.relations) +export(as.stages) +export(as.types) +export(as.types.in.patterns) +export(cbio.query) +export(change.color) +export(delete.event) +export(delete.gene) +export(delete.hypothesis) +export(delete.model) +export(delete.pattern) +export(delete.samples) +export(delete.type) +export(duplicates) +export(ebind) +export(enforce.numeric) +export(enforce.string) +export(events.selection) +export(export.mutex) +export(export.nbs.input) +export(extract.MAF.HuGO.Entrez.map) +export(genes.table.plot) +export(genes.table.report) +export(has.duplicates) +export(has.model) +export(has.stages) +export(hypothesis.add) +export(hypothesis.add.group) +export(hypothesis.add.homologous) +export(import.GISTIC) +export(import.MAF) +export(import.genotypes) +export(import.mutex.groups) +export(intersect.datasets) +export(is.compliant) +export(keysToNames) +export(merge.events) +export(merge.types) +export(nevents) +export(ngenes) +export(nhypotheses) +export(npatterns) +export(nsamples) +export(ntypes) +export(oncoprint) +export(oncoprint.cbio) +export(pathway.visualization) +export(pheatmap) +export(rank.recurrents) +export(rename.gene) +export(rename.type) +export(samples.selection) +export(sbind) +export(show) +export(sort.by.frequency) +export(ssplit) +export(trim) export(tronco.bootstrap) -export(tronco.bootstrap.show) export(tronco.caprese) +export(tronco.capri) export(tronco.plot) -export(types.add) -export(types.load) +export(which.samples) import(Rgraphviz) -import(graph) -import(lattice) -import(methods) +import(doParallel) +import(igraph) +importFrom(RColorBrewer,brewer.pal) +importFrom(RColorBrewer,brewer.pal.info) +importFrom(bnlearn,hc) +importFrom(bnlearn,tabu) +importFrom(cgdsr,CGDS) +importFrom(cgdsr,getCancerStudies) +importFrom(cgdsr,getCaseLists) +importFrom(cgdsr,getClinicalData) +importFrom(cgdsr,getGeneticProfiles) +importFrom(cgdsr,getProfileData) +importFrom(ggplot2,geom_bar) +importFrom(ggplot2,ggplot) +importFrom(grid,convertHeight) +importFrom(grid,convertWidth) +importFrom(grid,gList) +importFrom(grid,gTree) +importFrom(grid,gpar) +importFrom(grid,grid.draw) +importFrom(grid,grid.newpage) +importFrom(grid,grid.pretty) +importFrom(grid,grobTree) +importFrom(grid,polylineGrob) +importFrom(grid,rectGrob) +importFrom(grid,textGrob) +importFrom(grid,unit) +importFrom(grid,unit.c) +importFrom(grid,viewport) +importFrom(gridExtra,grid.arrange) +importFrom(gridExtra,grid.table) +importFrom(gtable,gtable) +importFrom(gtable,gtable_add_grob) +importFrom(gtable,gtable_height) +importFrom(gtable,gtable_width) +importFrom(reshape2,melt) +importFrom(scales,brewer_pal) +importFrom(scales,dscale) +importFrom(scales,hue_pal) +importFrom(xtable,xtable) diff --git a/R/as.functions.R b/R/as.functions.R new file mode 100644 index 00000000..481c8304 --- /dev/null +++ b/R/as.functions.R @@ -0,0 +1,1266 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + +#' Return all genotypes for input 'x', which should be a TRONCO compliant dataset +#' see \code{is.compliant}. +#' Function \code{keysToNames} can be used to translate colnames to events. +#' @title as.genotypes +#' +#' @examples +#' data(test_dataset) +#' as.genotypes(test_dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @return A TRONCO genotypes matrix. +#' @export as.genotypes +as.genotypes = function(x) +{ + return(x$genotypes) +} + +#' Return all sample IDs for input 'x', which should be a TRONCO compliant dataset - see \code{is.compliant}. +#' @title as.samples +#' +#' @examples +#' data(test_dataset) +#' as.samples(test_dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @return A vector of sample IDs +#' @export as.samples +as.samples = function(x) +{ + return(rownames(x$genotypes)) +} + +#' Return all gene symbols for which a certain type of event exists in 'x', which should be a +#' TRONCO compliant dataset - see \code{is.compliant}. +#' @title as.genes +#' +#' @examples +#' data(test_dataset) +#' as.genes(test_dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @param types The types of events to consider, if NA all available types are used. +#' @return A vector of gene symbols for which a certain type of event exists +#' @export as.genes +as.genes = function(x, types = NA) +{ + if('Pattern' %in% types) { + stop('Pattern is not a valid gene type, it\'s a keyword reserved in TRONCO.') + } + if(npatterns(x) > 0) + { + ev = as.events(x, types=types) + ev = ev[ which(ev[, 'type'] != 'Pattern'), 'event'] + return(unique(ev)) + } + else + return(unique(as.events(x, types=types)[, 'event'])) +} + +#' Return all events involving certain genes and of a certain type in 'x', which should be a +#' TRONCO compliant dataset - see \code{is.compliant}. +#' @title as.events +#' +#' @examples +#' data(test_dataset) +#' as.events(test_dataset) +#' as.events(test_dataset, types='ins_del') +#' as.events(test_dataset, genes = 'TET2') +#' as.events(test_dataset, types='Missing') +#' +#' @param x A TRONCO compliant dataset. +#' @param types The types of events to consider, if NA all available types are used. +#' @param genes The genes to consider, if NA all available genes are used. +#' @return A matrix with 2 columns (event type, gene name) for the events found. +#' @export as.events +as.events = function(x, genes=NA, types=NA) +{ + ann = x$annotations[, c('type', 'event'), drop=FALSE] + + if(!any(is.na(genes))) ann = ann[ which(ann[, 'event', drop=FALSE] %in% genes) , , drop=FALSE] + if(!any(is.na(types))) ann = ann[ which(ann[, 'type', drop=FALSE] %in% types) , , drop=FALSE] + + return(ann) +} + +#' Return the association sample -> stage, if any. Input 'x' should be a +#' TRONCO compliant dataset - see \code{is.compliant}. +#' @title as.stages +#' +#' @examples +#' data(test_dataset) +#' data(stage) +#' test_dataset = annotate.stages(test_dataset, stage) +#' as.stages(test_dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @return A matrix with 1 column annotating stages and rownames as sample IDs. +#' @export as.stages +as.stages = function(x) +{ + if(has.stages(x)) { + return(x$stages) + } + return(NA) +} + +#' Return the types of events for a set of genes which are in 'x', which should be a +#' TRONCO compliant dataset - see \code{is.compliant}. +#' @title as.types +#' +#' @examples +#' data(test_dataset) +#' as.types(test_dataset) +#' as.types(test_dataset, genes='TET2') +#' +#' @param x A TRONCO compliant dataset. +#' @param genes A list of genes to consider, if NA all genes are used. +#' @return A matrix with 1 column annotating stages and rownames as sample IDs. +#' @export as.types +as.types = function(x, genes=NA) +{ + return(unlist(unique(as.events(x, genes=genes)[, 'type']))) +} + +#' Return the colors associated to each type of event in 'x', which should be a +#' TRONCO compliant dataset - see \code{is.compliant}. +#' @title as.colors +#' +#' @examples +#' data(test_dataset) +#' as.colors(test_dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @return A named vector of colors. +#' @export as.colors +as.colors = function(x) +{ + return(x$types[, 'color']) +} + +#' Return the genotypes for a certain set of genes and type of events. Input 'x' should be a +#' TRONCO compliant dataset - see \code{is.compliant}. In this case column names are substituted +#' with events' types. +#' @title as.gene +#' +#' @examples +#' data(test_dataset) +#' as.gene(test_dataset, genes = c('EZH2', 'ASXL1')) +#' +#' @param x A TRONCO compliant dataset. +#' @param types The types of events to consider, if NA all available types are used. +#' @param genes The genes to consider, if NA all available genes are used. +#' @return A matrix, subset of \code{as.genotypes(x)} with colnames substituted with events' types. +#' @export as.gene +as.gene = function(x, genes, types=NA) +{ + keys = as.events(x, genes=genes, types=types) + + data = data.frame(x$genotypes[, rownames(keys)], row.names = as.samples(x)) + colnames(data) = apply(keys, 1, FUN = paste, collapse = ' ') + + return(data) +} + + +#' Return a dataset where all events for a gene are merged in a unique event, i.e., +#' a total of gene-level alterations diregarding the event type. Input 'x' is checked +#' to be a TRONCO compliant dataset - see \code{is.compliant}. +#' @title as.alterations +#' +#' @examples +#' data(muts) +#' as.alterations(muts) +#' +#' @param x A TRONCO compliant dataset. +#' @param new.type The types label of the new event type, 'Alteration' by default. +#' @param new.color The color of the event \code{new.type}, default 'khaki'. +#' @return A TRONCO compliant dataset with alteration profiles. +#' @export as.alterations +as.alterations = function(x, new.type = 'Alteration', new.color = 'khaki') { + is.compliant(x) + merge.types(x, NULL, new.type = new.type, new.color = new.color) +} + +#' Return the patterns in the dataset which constitute CAPRI's hypotheses. +#' @title as.patterns +#' +#' @examples +#' data(test_dataset) +#' as.patterns(test_dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @return The patterns in the dataset which constitute CAPRI's hypotheses. +#' @export as.patterns +as.patterns = function(x) +{ + if(length(x$hypotheses) == 0 || is.na(x$hypotheses)) { + return(NULL) + } + + is.compliant(x) + if ('hstructure' %in% names(x$hypotheses)) { + return(ls(x$hypotheses$hstructure)) + } + return(NULL) +} + +#' Return the hypotheses in the dataset which constitute CAPRI's hypotheses. +#' @title as.hypotheses +#' +#' @examples +#' data(test_dataset) +#' as.hypotheses(test_dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @param cause A list of genes to use as causes +#' @param effect A list of genes to use as effects +#' @return The hypotheses in the dataset which constitute CAPRI's hypotheses. +#' @export as.hypotheses +as.hypotheses = function(x, cause=NA, effect=NA) +{ + if (nhypotheses(x) < 1) + return(NULL) + + hlist = x$hypotheses$hlist + + list_c = x$annotations[hlist[,'cause'],c('type', 'event'), drop=F] + colnames(list_c) = c('cause type', 'cause event') + rownames(list_c) = NULL + list_e = x$annotations[hlist[,'effect'],c('type', 'event'), drop=F] + colnames(list_e) = c('effect type', 'effect event') + rownames(list_e) = NULL + + filtered_list = cbind(list_c, list_e) + + if(!is.na(cause)) { + if(all(cause %in% as.events(x)[,'event'])) { + filtered_list = filtered_list[which(filtered_list[,'cause event'] == cause), ] + } else { + stop('some cause not in as.events\n') + } + } + + if(!is.na(effect)) { + if(all(effect %in% as.events(x)[,'event'])) { + filtered_list = filtered_list[which(filtered_list[,'effect event'] == effect), ] + } else { + stop('some effect not in as.events\n') + } + } + + return(filtered_list) +} + +#' Return the list of events present in selected patterns +#' +#' @examples +#' data(test_dataset) +#' as.events.in.patterns(test_dataset) +#' as.events.in.patterns(test_dataset, patterns='XOR_EZH2') +#' +#' @title as.events.in.patterns +#' @param x A TRONCO compliant dataset. +#' @param patterns A list of patterns for which the list will be returned +#' @return A list of events present in patterns which consitute CAPRI's hypotheses +#' @export as.events.in.patterns +as.events.in.patterns = function(x, patterns=NULL) +{ + is.compliant(x) + ann = x$annotations[, c('type', 'event'), drop=FALSE] + if (is.null(patterns)) { + patterns = as.patterns(x) + } + + genes_list = NULL + for(h in patterns) { + if(!h %in% as.patterns(x)) { + stop('Hypothesis ', h, ' not in as.patterns(x)') + } + + g = lapply(colnames(x$hypotheses$hstructure[[h]]), function(x){ if(!is.logic.node(x))return(x)}) + genes_list = append(genes_list, g) + } + genes_list = unique(unlist(genes_list)) + + if(!(is.null(genes_list))) ann = ann[ which(rownames(ann) %in% genes_list) , , drop=FALSE] + + return(ann) +} + +#' Return the list of genes present in selected patterns +#' +#' @examples +#' data(test_dataset) +#' as.genes.in.patterns(test_dataset) +#' as.genes.in.patterns(test_dataset, patterns='XOR_EZH2') +#' +#' @title as.genes.in.patterns +#' @param x A TRONCO compliant dataset. +#' @param patterns A list of patterns for which the list will be returned +#' @return A list of genes present in patterns which consitute CAPRI's hypotheses +#' @export as.genes.in.patterns +as.genes.in.patterns = function(x, patterns=NULL) { + events = as.events.in.patterns(x, patterns) + genes = unique(events[,'event']) + return(genes) +} + +#' Return the list of types present in selected patterns +#' +#' @examples +#' data(test_dataset) +#' as.types.in.patterns(test_dataset) +#' as.types.in.patterns(test_dataset, patterns='XOR_EZH2') +#' +#' @title as.types.in.patterns +#' @param x A TRONCO compliant dataset. +#' @param patterns A list of patterns for which the list will be returned +#' @return A list of types present in patterns which consitute CAPRI's hypotheses +#' @export as.types.in.patterns +as.types.in.patterns = function(x, patterns=NULL) { + events = as.events.in.patterns(x, patterns) + types = unique(events[,'type']) + return(types) +} + +#' Return a list of events which are observed in the input samples list +#' +#' @examples +#' data(test_dataset) +#' as.events.in.sample(test_dataset, c('patient 1', 'patient 7')) +#' +#' @title as.events.in.sample +#' @param x A TRONCO compliant dataset +#' @param sample Vector of sample names +#' @return A list of events which are observed in the input samples list +#' @export as.events.in.sample +as.events.in.sample = function(x, sample) +{ + aux = function(s) + { + sub.geno = as.genotypes(x)[s, , drop = FALSE] + sub.geno = sub.geno[, sub.geno == 1, drop = FALSE] + return(as.events(x)[colnames(sub.geno), , drop = FALSE]) + } + + return(sapply(sample, FUN = aux)) +} + +#' Return confidence information for a TRONCO model. Available information are: temporal priority (tp), +#' probability raising (pr), hypergeometric test (hg), parametric (pb), non parametric (npb) or +#' statistical (sb) bootstrap. +#' Confidence is available only once a model has been reconstructed with any of the algorithms implemented +#' in TRONCO. If more than one model has been reconstructed - for instance via multiple regularizations - +#' confidence information is appropriately nested. The requested confidence is specified via +#' vector parameter \code{conf}. +#' +#' @examples +#' data(test_model) +#' as.confidence(test_model, conf='tp') +#' as.confidence(test_model, conf=c('tp', 'hg')) +#' +#' @title as.confidence +#' @param x A TRONCO model. +#' @param conf A vector with any of 'tp', 'pr', 'hg', 'npb', 'pb' or 'sb'. +#' @return A list of matrices with the event-to-event confidence. +#' @export as.confidence +as.confidence = function(x, conf) +{ + is.compliant(x) + is.model(x) + if(!is.vector(conf)) stop('"conf" should be a vector.') + + keys = c('hg', 'tp', 'pr', 'npb', 'pb', 'sb') + + if(!all(conf %in% keys)) + stop('Confidence keyword unrecognized, \'conf\' should be any of:\n + INPUT DATASET\n + \t \"hg\" - hypergeometric test (randomness of observations)\n + SELECTIVE ADVANTAGE SCORES\n + \t \"tp\" - temporal priority (temporal ordering of events)\n + \t \"pr\" - probability raising (selectivity among events)\n + MODEL CONFIDENCE - requires post-reconstruction bootstrap\n + \t \"npb\" - non-parametric bootstrap,\n + \t \"pb\" - parametric bootstrap\n + \t \"sb\" - statistical bootstrap\n' + ) + + if(is.null(x$confidence) || is.na(x$confidence) || is.null(x$model) || is.na(x$model)) + stop('Input \'x\' does not contain a TRONCO model. No confidence to show.\n') + + models = names(x$model) + + has.npb.bootstrap = is.null(x$bootstrap[[models[1]]]$npb) + has.pb.bootstrap = is.null(x$bootstrap[[models[1]]]$pb) + has.sb.bootstrap = is.null(x$bootstrap[[models[1]]]$sb) + + if( 'npb' %in% conf && has.npb.bootstrap) { + stop('Non-parametric bootstrap was not performed. Remove keyword\n') + } + + if( 'pb' %in% conf && has.pb.bootstrap) { + stop('Parametric bootstrap was not performed. Remove keyword\n') + } + + if( 'sb' %in% conf && has.sb.bootstrap) { + stop('Statistical bootstrap was not performed. Remove keyword\n') + } + + result = NULL + + if('hg' %in% conf) { + result$hg = x$confidence['hypergeometric test', ][[1]] + } + if('tp' %in% conf) { + result$tp = x$confidence['temporal priority', ][[1]] + } + if('pr' %in% conf) { + result$pr = x$confidence['probability raising', ][[1]] + } + if('npb' %in% conf) { + for(i in 1:length(models)) { + result$npb[models[i]] = list(x$bootstrap[[models[i]]]$npb$bootstrap.edge.confidence) + } + } + if('pb' %in% conf) { + for(i in 1:length(models)) { + result$pb[models[i]] = list(x$bootstrap[[models[i]]]$pb$bootstrap.edge.confidence) + } + } + + if('sb' %in% conf) { + for(i in 1:length(models)) { + result$sb[models[i]] = list(x$bootstrap[[models[i]]]$sb$bootstrap.edge.confidence) + } + } + return(result) +} + +#' Extract the models from a reconstructed object. +#' +#' @examples +#' data(test_model) +#' as.models(test_model) +#' +#' @title as.models +#' @param x A TRONCO model. +#' @param models The name of the models to extract, e.g. 'bic', 'aic', 'caprese', all by default. +#' @return The models in a reconstructed object. +#' @export as.models +as.models = function(x, models=names(x$model)) +{ + is.compliant(x) + is.model(x) + if(!is.vector(models)) { + stop('"models" should be a vector.') + } + + return(x$model[models]) +} + +#' Return the description annotating the dataset, if any. Input 'x' should be +#' a TRONCO compliant dataset - see \code{is.compliant}. +#' +#' @examples +#' data(test_dataset) +#' as.description(test_dataset) +#' +#' @title as.description +#' @param x A TRONCO compliant dataset. +#' @return The description annotating the dataset, if any. +#' @export as.description +as.description = function(x) +{ + if(!is.null(x$name)) + return(x$name) + return("") +} + +#' Given a cohort and a pathway, return the cohort with events restricted to genes +#' involved in the pathway. This might contain a new 'pathway' genotype with an alteration mark if +#' any of the involved genes are altered. +#' +#' @examples +#' data(test_dataset) +#' p = as.pathway(test_dataset, c('ASXL1', 'TET2'), 'test_pathway') +#' +#' @title as.pathway +#' @param x A TRONCO compliant dataset. +#' @param pathway.genes Gene (symbols) involved in the pathway. +#' @param pathway.name Pathway name for visualization. +#' @param pathway.color Pathway color for visualization. +#' @param aggregate.pathway If TRUE drop the events for the genes in the pathway. +#' @return Extract the subset of events for genes which are part of a pathway. +#' @export as.pathway +as.pathway <- function(x, + pathway.genes, + pathway.name, + pathway.color='yellow', + aggregate.pathway = TRUE) +{ + is.compliant(x, 'as.pathway: input') + data = x$genotypes + cat(paste('*** Extracting events for pathway: ', pathway.name,'.\n', sep='')) + + # Select only those events involving a gene in pathway.genes which is also in x + y = events.selection(x, NA, filter.in.names=pathway.genes, NA) + + # Extend genotypes + y = enforce.numeric(y) + + pathway = data.frame(rowSums(as.genotypes(y)), row.names = as.samples(y), stringsAsFactors = FALSE) + pathway[pathway > 1, ] = 1 + colnames(pathway) = pathway.name + + pathway = import.genotypes(pathway, event.type = 'Pathway', color = pathway.color) + + cat('Pathway extracted succesfully.\n') + + if(!aggregate.pathway) { + pathway = ebind(pathway, y) + } + + if(has.stages(y)) { + pathway = annotate.stages(pathway, as.stages(y)) + } + + is.compliant(pathway, 'as.pathway: output') + + return(pathway) +} + +#' Extract the adjacency matrix of a TRONCO model. The matrix is indexed with colnames/rownames which +#' represent genotype keys - these can be resolved with function \code{keysToNames}. It is possible to +#' specify a subset of events to build the matrix, a subset of models if multiple reconstruction have +#' been performed. Also, either the prima facie matrix or the post-regularization matrix can be extracted. +#' +#' @examples +#' data(test_model) +#' as.adj.matrix(test_model) +#' as.adj.matrix(test_model, events=as.events(test_model)[5:15,]) +#' as.adj.matrix(test_model, events=as.events(test_model)[5:15,], type='pf') +#' +#' @title as.adj.matrix +#' @param x A TRONCO model. +#' @param events A subset of events as of \code{as.events(x)}, all by default. +#' @param models A subset of reconstructed models, all by default. +#' @param type Either the prima facie ('pf') or the post-regularization ('fit') matrix, 'fit' by default. +#' @return The adjacency matrix of a TRONCO model. +#' @export as.adj.matrix +as.adj.matrix = function(x, events = as.events(x), models = names(x$model), type = 'fit') +{ + is.compliant(x) + is.model(x) + is.events.list(x, events) + + if(!is.vector(models)) { + stop('"models" should be a vector.') + } + if(!type %in% c('fit', 'pf') ) { + stop('"type" should be any of \'fit\' (post-regularization) or \'pf\' (prima facie).') + } + m = as.models(x, models = models) + + ret = list() + for(i in models) + { + if(type == 'fit') mat = m[[i]]$adj.matrix$adj.matrix.fit + if(type == 'pf') mat = m[[i]]$adj.matrix$adj.matrix.pf + + mat = mat[rownames(events), , drop = FALSE] + mat = mat[, rownames(events), drop = FALSE] + + ret = append(ret, list(mat)) + } + + names(ret) = models + return(ret) +} + +#' Extract the marginal probabilities from a TRONCO model. The return matrix is indexed with rownames which +#' represent genotype keys - these can be resolved with function \code{keysToNames}. It is possible to +#' specify a subset of events to build the matrix, a subset of models if multiple reconstruction have +#' been performed. Also, either the observed or fit probabilities can be extracted. +#' +#' @examples +#' data(test_model) +#' as.marginal.probs(test_model) +#' as.marginal.probs(test_model, events=as.events(test_model)[5:15,]) +#' +#' @title as.marginal.probs +#' @param x A TRONCO model. +#' @param events A subset of events as of \code{as.events(x)}, all by default. +#' @param models A subset of reconstructed models, all by default. +#' @param type Either observed ('observed') or fit ('fit') probabilities, 'observed' by default. +#' @return The marginal probabilities in a TRONCO model. +#' @export as.marginal.probs +as.marginal.probs = function(x, events = as.events(x), models = names(x$model), type = 'observed') +{ + is.compliant(x) + is.model(x) + is.events.list(x, events) + + if(!type %in% c('observed', 'fit') ) { + stop('Marginal probabilities are available for \'observed\' (empirical) or \'fit\' (estimated).') + } + if(any(is.null(colnames(events)))) { + stop('Events should have rownames to access the adjacency matrix - use \'as.events\' function?') + } + + m = as.models(x, models = models) + + ret = list() + for(i in models) + { + if(type == 'observed') mat = m[[i]]$probabilities$probabilities.observed$marginal.probs + if(type == 'fit') mat = m[[i]]$probabilities$probabilities.fit$estimated.marginal.probs + + if(type == 'fit' && is.na(mat)) stop('Marginal probabilities have not been estimated yet - see TRONCO Manual.') + + mat = mat[rownames(events), , drop = FALSE] + ret = append(ret, list(mat)) + } + + names(ret) = models + return(ret) +} + +#' Extract the joint probabilities from a TRONCO model. The return matrix is indexed with rownames/colnames which +#' represent genotype keys - these can be resolved with function \code{keysToNames}. It is possible to +#' specify a subset of events to build the matrix, a subset of models if multiple reconstruction have +#' been performed. Also, either the observed or fit probabilities can be extracted. +#' +#' @examples +#' data(test_model) +#' as.joint.probs(test_model) +#' as.joint.probs(test_model, events=as.events(test_model)[5:15,]) +#' +#' @title as.joint.probs +#' @param x A TRONCO model. +#' @param events A subset of events as of \code{as.events(x)}, all by default. +#' @param models A subset of reconstructed models, all by default. +#' @param type Either observed ('observed') or fit ('fit') probabilities, 'observed' by default. +#' @return The joint probabilities in a TRONCO model. +#' @export as.joint.probs +as.joint.probs = function(x, events = as.events(x), models = names(x$model), type = 'observed') +{ + is.compliant(x) + is.model(x) + is.events.list(x, events) + + if(!type %in% c('observed', 'fit') ) { + stop('Joint probabilities are available for \'observed\' (empirical) or \'fit\' (estimated).') + } + if(any(is.null(colnames(events)))) { + stop('Events should have rownames to access the adjacency matrix - use \'as.events\' function?') + } + + m = as.models(x, models = models) + + ret = list() + for(i in models) + { + if(type == 'observed') mat = m[[i]]$probabilities$probabilities.observed$joint.probs + if(type == 'fit') mat = m[[i]]$probabilities$probabilities.fit$estimated.joint.probs + + if(type == 'fit' && is.na(mat)) stop('Joint probabilities have not been estimated yet - see TRONCO Manual.') + + mat = mat[rownames(events), , drop = FALSE] + mat = mat[, rownames(events), drop = FALSE] + ret = append(ret, list(mat)) + } + + names(ret) = models + return(ret) +} + +#' Extract the conditional probabilities from a TRONCO model. The return matrix is indexed with rownames which +#' represent genotype keys - these can be resolved with function \code{keysToNames}. It is possible to +#' specify a subset of events to build the matrix, a subset of models if multiple reconstruction have +#' been performed. Also, either the observed or fit probabilities can be extracted. +#' +#' @title as.conditional.probs +#' @param x A TRONCO model. +#' @param events A subset of events as of \code{as.events(x)}, all by default. +#' @param models A subset of reconstructed models, all by default. +#' @param type Either observed ('observed') or fit ('fit') probabilities, 'observed' by default. +#' @return The conditional probabilities in a TRONCO model. +#' @export as.conditional.probs +as.conditional.probs = function(x, events = as.events(x), models = names(x$model), type = 'observed') +{ + is.compliant(x) + is.model(x) + is.events.list(x, events) + + if(!type %in% c('observed', 'fit') ) { + stop('Conditional probabilities are available for \'observed\' (empirical) or \'fit\' (estimated).') + } + if(any(is.null(colnames(events)))) { + stop('Events should have rownames to access the adjacency matrix - use \'as.events\' function?') + } + + m = as.models(x, models = models) + + ret = list() + for(i in models) + { + if(type == 'observed') mat = m[[i]]$probabilities$probabilities.observed$conditional.probs + if(type == 'fit') mat = m[[i]]$probabilities$probabilities.fit$estimated.conditional.probs + + if(type == 'fit' && is.na(mat)) stop('Conditional probabilities have not been estimated yet - see TRONCO Manual.') + + mat = mat[rownames(events), , drop = FALSE] + ret = append(ret, list(mat)) + } + + names(ret) = models + return(ret) +} + +#' Extract the estimated rates of false positives an negatives in the data, given the model. +#' A subset of models if multiple reconstruction have been performed can be extracted. +#' +#' @examples +#' data(test_model) +#' as.error.rates(test_model) +#' +#' @title as.error.rates +#' @param x A TRONCO model. +#' @param models A subset of reconstructed models, all by default. +#' @return The estimated rates of false positives an negatives in the data, given the model. +#' @export as.error.rates +as.error.rates = function(x, models = names(x$model)) +{ + is.compliant(x) + is.model(x) + + m = as.models(x, models = models) + + ret = list() + for(i in models) + { + mat = m[[i]]$error.rates + ret = append(ret, list(mat)) + } + + names(ret) = models + return(ret) +} + +#' Returns a dataframe with all the selective advantage relations in a +#' TRONCO model. Confidence is also shown - see \code{as.confidence}. It is possible to +#' specify a subset of events or models if multiple reconstruction have +#' been performed. +#' +#' @examples +#' data(test_model) +#' as.selective.advantage.relations(test_model) +#' as.selective.advantage.relations(test_model, events=as.events(test_model)[5:15,]) +#' as.selective.advantage.relations(test_model, events=as.events(test_model)[5:15,], type='pf') +#' +#' @title as.selective.advantage.relations +#' @param x A TRONCO model. +#' @param events A subset of events as of \code{as.events(x)}, all by default. +#' @param models A subset of reconstructed models, all by default. +#' @param type Either Prima Facie ('pf') or fit ('fit') probabilities, 'fit' by default. +#' @return All the selective advantage relations in a TRONCO model +#' @export as.selective.advantage.relations +as.selective.advantage.relations = function(x, events = as.events(x), models = names(x$model), type = 'fit') +{ + is.compliant(x) + is.model(x) + is.events.list(x, events) + + # TEMPORARY HORRIBLE FIX + matrix = NULL + if(type == 'pf') matrix$pf = x$adj.matrix.prima.facie + else matrix = as.adj.matrix(x, events = events, models = models, type = type) + matrix = lapply(matrix, keysToNames, x = x) + + conf = as.confidence(x, conf = c('tp', 'pr', 'hg')) + conf = lapply(conf, keysToNames, x = x) + + matrix.to.df = function(m) + { + entries = length(which(m == 1)) + df = NULL + df$SELECTS = NULL + df$SELECTED = NULL + df$OBS.SELECTS = NULL + df$OBS.SELECTED = NULL + df$HG = NULL + df$TP = NULL + df$PR = NULL + + if(entries == 0) { + return(NULL) + } + + for(i in 1:ncol(m)) { + for(j in 1:nrow(m)) { + if(m[i,j] == 1) + { + df$SELECTS = c(df$SELECTS, rownames(m)[i]) + df$SELECTED = c(df$SELECTED, colnames(m)[j]) + + df$OBS.SELECTS = c(df$OBS.SELECTS, sum(as.genotypes(x)[, nameToKey(x, rownames(m)[i])])) + df$OBS.SELECTED = c(df$OBS.SELECTED, sum(as.genotypes(x)[, nameToKey(x, colnames(m)[j])])) + + df$TP = c(df$TP, conf$tp[rownames(m)[i], colnames(m)[j]]) + df$PR = c(df$PR, conf$pr[rownames(m)[i], colnames(m)[j]]) + df$HG = c(df$HG, conf$hg[rownames(m)[i], colnames(m)[j]]) + } + } + } + + df = cbind(df$SELECTS, df$SELECTED, df$OBS.SELECTS, df$OBS.SELECTED, df$TP, df$PR, df$HG) + + colnames(df) = c('SELECTS', 'SELECTED', 'OBS.SELECTS', 'OBS.SELECTED', 'TEMPORAL.PRIORITY', 'PROBABILITY.RAISING', 'HYPERGEOMETRIC') + rownames(df) = paste(1:nrow(df)) + + df = data.frame(df, stringsAsFactors = FALSE) + df$OBS.SELECTS = as.numeric(df$OBS.SELECTS) + df$OBS.SELECTED = as.numeric(df$OBS.SELECTED) + df$HYPERGEOMETRIC = as.numeric(df$HYPERGEOMETRIC) + df$TEMPORAL.PRIORITY = as.numeric(df$TEMPORAL.PRIORITY) + df$PROBABILITY.RAISING = as.numeric(df$PROBABILITY.RAISING) + + return(df) + } + + + return(lapply(matrix, matrix.to.df)) +} + +#' Get parents for each node +#' +#' @title as.parents.pos +#' @param x A TRONCO model. +#' @param events A subset of events as of \code{as.events(x)}, all by default. +#' @param models A subset of reconstructed models, all by default. +#' @return A list of parents for each node +as.parents.pos = function(x, events = as.events(x), models = names(x$model)) +{ + is.compliant(x) + is.model(x) + is.events.list(x, events) + + m = as.models(x, models = models) + + ret = list() + for(i in models) + { + mat = m[[i]]$parents.pos + mat = mat[rownames(events), , drop = FALSE] + ret = append(ret, list(mat)) + } + + names(ret) = models + return(ret) +} + +#' Return true if the TRONCO dataset 'x', which should be a TRONCO compliant dataset +#' - see \code{is.compliant} - has stage annotations for samples. Some sample stages +#' might be annotated as NA, but not all. +#' +#' @examples +#' data(test_dataset) +#' has.stages(test_dataset) +#' data(stage) +#' test_dataset = annotate.stages(test_dataset, stage) +#' has.stages(test_dataset) +#' +#' @title has stages +#' @param x A TRONCO compliant dataset. +#' @return TRUE if the TRONCO dataset has stage annotations for samples. +#' @export has.stages +has.stages = function(x) +{ + return(!(all(is.null(x$stages)) || all(is.na(x$stages)))) +} + + +#' Return true if there are duplicated events in the TRONCO dataset 'x', which should be +#' a TRONCO compliant dataset - see \code{is.compliant}. Events are identified by a gene +#' name, e.g., a HuGO_Symbol, and a type label, e.g., c('SNP', 'KRAS') +#' +#' @examples +#' data(test_dataset) +#' has.duplicates(test_dataset) +#' +#' @title has.duplicates +#' @param x A TRONCO compliant dataset. +#' @return TRUE if there are duplicated events in \code{x}. +#' @export has.duplicates +has.duplicates = function(x) { +# find duplicate over the dataset + dup = duplicated(as.events(x)) + +# return true if at least one duplicate is found + return(any(dup)) +} + +#' Return true if there is a reconstructed model in the TRONCO dataset 'x', which should be +#' a TRONCO compliant dataset - see \code{is.compliant}. +#' +#' @examples +#' data(test_dataset) +#' has.model(test_dataset) +#' +#' @title has.model +#' @param x A TRONCO compliant dataset. +#' @return TRUE if there is a reconstructed model in \code{x}. +#' @export has.model +has.model = function(x) { + is.compliant(x) + if(length(x$model) > 0) + return(TRUE) + return(FALSE) +} + +#' Return the events duplicated in \code{x}, if any. Input 'x' should be +#' a TRONCO compliant dataset - see \code{is.compliant}. +#' +#' @examples +#' data(test_dataset) +#' duplicates(test_dataset) +#' +#' @title duplicates +#' @param x A TRONCO compliant dataset. +#' @return A subset of \code{as.events(x)} with duplicated events. +#' @export duplicates +duplicates = function(x) { + is.compliant(x) + return(as.events(x)[duplicated(as.events(x)),]) +} + + + +#' Print to console a short report of a dataset 'x', which should be +#' a TRONCO compliant dataset - see \code{is.compliant}. +#' +#' @examples +#' data(test_dataset) +#' show(test_dataset) +#' +#' @title show +#' @param x A TRONCO compliant dataset. +#' @param view The firse \code{view} events are shown via \code{head}. +#' @export show +show = function(x, view = 10) +{ + is.compliant(x) + x = enforce.numeric(x) + view = min(view, nevents(x)) + + if(as.description(x) != "") + cat(paste('Description: ', as.description(x), '.\n', sep='')) + + + cat(paste('Dataset: n=', nsamples(x), ', m=', nevents(x), ', |G|=', ngenes(x), '.\n', sep='')) + cat(paste('Events (types): ', paste(as.types(x), collapse=', '), '.\n', sep='')) + cat(paste('Colors (plot): ', paste(as.colors(x), collapse=', '), '.\n', sep='')) + + if(has.stages(x)) + { + cat(paste('Stages: ')) + + s = unlist(sort(unique(as.stages(x)[, 1]))) + cat((paste(paste(s, collapse=', ', sep=''), '.\n', sep=''))) + } + + cat(paste('Events (', view, ' shown):\n', sep='')) + to.show = paste( '\t', + rownames(as.events(x)[1: view,]), ':', + as.events(x)[1: view, 1], as.events(x)[1: view, 2], sep=' ') + + cat(paste(to.show, collapse = '\n')) + + cat(paste('\nGenotypes (', view, ' shown):\n', sep='')) + print(head(x$genotypes[,1:view, drop=FALSE])) +} + +#' Return the number of types in the dataset. +#' +#' @examples +#' data(test_dataset) +#' ntypes(test_dataset) +#' +#' @title ntypes +#' @param x A TRONCO compliant dataset. +#' @return The number of types in the dataset. +#' @export ntypes +ntypes = function(x) +{ + return(length(as.types(x))) +} + +#' Return the number of samples in the dataset. +#' +#' @examples +#' data(test_dataset) +#' nsamples(test_dataset) +#' +#' @title nsamples +#' @param x A TRONCO compliant dataset. +#' @return The number of samples in the dataset. +#' @export nsamples +nsamples = function(x) +{ + return(nrow(x$genotypes)) +} + +#' Return the number of events in the dataset involving a certain gene or type of event. +#' +#' @examples +#' data(test_dataset) +#' nevents(test_dataset) +#' +#' @title nevents +#' @param x A TRONCO compliant dataset. +#' @param genes The genes to consider, if NA all available genes are used. +#' @param types The types of events to consider, if NA all available types are used. +#' @return The number of events in the dataset involving a certain gene or type of event. +#' @export nevents +nevents = function(x, genes=NA, types=NA) +{ + return(nrow(as.events(x, genes, types))) +} + +#' Return the number of genes in the dataset involving a certain type of event. +#' +#' @examples +#' data(test_dataset) +#' ngenes(test_dataset) +#' +#' @title ngenes +#' @param x A TRONCO compliant dataset. +#' @param types The types of events to consider, if NA all available types are used. +#' @return The number of genes in the dataset involving a certain type of event. +#' @export ngenes +ngenes = function(x, types=NA) +{ + return(length(as.genes(x, types=types))) +} + +#' Return the number of patterns in the dataset +#' +#' @examples +#' data(test_dataset) +#' npatterns(test_dataset) +#' +#' +#' @param x the dataset. +#' @export npatterns +npatterns = function(x) +{ + if(any(is.null(x$hypotheses))) { + return(0) + } + + if ('hstructure' %in% names(x$hypotheses)) { + return(length(ls(x$hypotheses$hstructure))) + } + return(0) +} + +#' Return the number of hypotheses in the dataset +#' +#' @examples +#' data(test_dataset) +#' nhypotheses(test_dataset) +#' +#' @param x the dataset. +#' @export nhypotheses +nhypotheses = function(x) +{ + if(npatterns(x) < 1) { + return(0) + } + + if ('hlist' %in% names(x$hypotheses)) { + return(length(x$hypotheses$hlist) / 2) + } + return(0) +} + +#' Convert the internal reprensentation of genotypes to numeric, if not. +#' +#' @examples +#' data(test_dataset) +#' test_dataset = enforce.numeric(test_dataset) +#' +#' @title enforce.numeric +#' @param x A TRONCO compliant dataset. +#' @return Convert the internal reprensentation of genotypes to numeric, if not. +#' @export enforce.numeric +enforce.numeric = function(x) +{ + is.compliant(x) + if(!all(is.numeric(x$genotypes[1,]))) + { + rn = as.samples(x) + x$genotypes = apply(x$genotypes, 2, as.numeric) + rownames(x$genotypes) = rn + } + + return(x) +} + +#' Convert the internal representation of genotypes to character, if not. +#' +#' @examples +#' data(test_dataset) +#' test_dataset = enforce.string(test_dataset) +#' +#' @title enforce.string +#' @param x A TRONCO compliant dataset. +#' @return Convert the internal reprensentation of genotypes to character, if not. +#' @export enforce.string +enforce.string = function(x) +{ + is.compliant(x) + if(!all(is.character(x$genotypes[1,]))) + { + rn = as.samples(x) + x$genotypes = apply(x$genotypes, 2, as.character) + rownames(x$genotypes) = rn + } + + return(x) +} + +#' Sort the internal genotypes according to event frequency. +#' +#' @examples +#' data(test_dataset) +#' sort.by.frequency(test_dataset) +#' +#' @title sort.by.frequency +#' @param x A TRONCO compliant dataset. +#' @param decreasing Inverse order. Default TRUE +#' @param ... just for compatibility +#' @return A TRONCO compliant dataset with the internal genotypes sorted according to event frequency. +#' @export sort.by.frequency +sort.by.frequency = function(x, decreasing=TRUE, ...) +{ + other.argument = list(...) + + is.compliant(x) + + x = enforce.numeric(x) + sums = colSums(x$genotypes) + + x$genotypes = x$genotypes[, order(sums, decreasing = decreasing), drop=F] + x$annotations = x$annotations[colnames(x$genotypes), , drop=F] + + return(x) +} + +#' Convert colnames/rownames of a matrix into intelligible event names, e.g., change a key G23 in 'Mutation KRAS'. +#' If a name is not found, the original name is left unchanged. +#' +#' @examples +#' data(test_model) +#' adj_matrix = as.adj.matrix(test_model, events=as.events(test_model)[5:15,])$bic +#' keysToNames(test_model, adj_matrix) +#' +#' +#' @title keysToNames +#' @param x A TRONCO compliant dataset. +#' @param matrix A matrix with colnames/rownames which represent genotypes keys. +#' @return The matrix with intelligible colnames/rownames. +#' @export keysToNames +keysToNames = function(x, matrix) +{ + is.compliant(x) + if(!is.matrix(matrix) || + any(is.null(colnames(matrix))) || + any(is.null(rownames(matrix))) + ) stop('"matrix" should be a matrix with rownames/colnames.') + + events = as.events(x) + resolve = function(y){ + if(y %in% rownames(events)) paste(events[y,], collapse=' ') + else y + } + + colnames(matrix) = sapply(colnames(matrix), resolve) + rownames(matrix) = sapply(rownames(matrix), resolve) + return(matrix) +} + +nameToKey = function(x, name) +{ + is.compliant(x) + + types = as.types(x) + for(i in types) + { + if( nchar(name) > nchar(i) && + substr(name, 1, nchar(i)) == i ) + return( + rownames( + as.events(x, + genes = substr(name, nchar(i) + 2, nchar(name)), + types = substr(name, 1, nchar(i)))) + ) + } + + stop('"name" is not a key!') +} + + + +#' Check if logic node down +#' +#' @title is logical node down +#' @param node A node identifier +#' @return boolean +is.logic.node.down <- function(node) { + if(substr(node, start=1, stop=3) == 'OR_') + return(TRUE) + if(substr(node, start=1, stop=4) == 'XOR_') + return(TRUE) + if(substr(node, start=1, stop=4) == 'AND_') + return(TRUE) + if(substr(node, start=1, stop=4) == 'NOT_') + return(TRUE) + return(FALSE) +} + +#' Check if logic node up +#' +#' @title is logical node up +#' @param node A node identifier +#' @return boolean +is.logic.node.up <- function(node) { + if(substr(node, start=1, stop=2) == 'UP') + return(TRUE) + return(FALSE) +} + +#' Check if logic node down or up +#' +#' @title is logical node +#' @param node A node identifier +#' @return boolean +is.logic.node <- function(node) { + return(is.logic.node.up(node) || is.logic.node.down(node)) +} diff --git a/R/bootstrap.caprese.R b/R/bootstrap.caprese.R deleted file mode 100644 index f91c63df..00000000 --- a/R/bootstrap.caprese.R +++ /dev/null @@ -1,149 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#perform non-parametric or parametric bootstrap to evalutate the confidence of the reconstruction -#INPUT: -#dataset: a dataset describing a progressive phenomenon -#lambda: shrinkage parameter (value in [0,1]) -#reconstructed.topology: previously reconstructed topology (before any bootstrap) -#command: should I perform non-parametric or parametric bootstrap? -#estimated.marginal.probabilities: estimated marginal probabilities of the events given the selected error rates -#estimated.conditional.probabilities: estimated conditional probabilities of the events given the selected error rates -#error.rates: selected error rates to be used if the bootstrap is "parametric" -#nboot: number of bootstrap resampling to be performed -#RETURN: -#bootstrap.statistics: statistics of the bootstrap -"bootstrap.caprese" <- -function(dataset, lambda, reconstructed.topology, command=c("non-parametric","parametric"), estimated.marginal.probabilities, estimated.conditional.probabilities, error.rates, nboot) { - #structure to save the statistics of the bootstrap - bootstrap.adj.matrix = array(0,c(ncol(dataset)+1,ncol(dataset)+1)); - colnames(bootstrap.adj.matrix) = c("None",colnames(dataset)); - rownames(bootstrap.adj.matrix) = c("None",colnames(dataset)); - #set the type of bootstrap to be performed, i.e., non-parametric or parametric - command <- match.arg(command); - #set the dataset if the bootstrap is parametric - if(command=="parametric") { - #define the possible samples given the current number of events - possible.strings = 2^ncol(dataset); - err = ""; - message = "Too many events! Parametric bootstrastap can not be performed." - err <- tryCatch(curr.dataset <- suppressWarnings(array(0,c(possible.strings,ncol(dataset)))), error = function(e) err <- message); - if(toString(err) == message) { - stop(err, call. = FALSE); - } - for (i in 1:possible.strings) { - curr.dataset[i,] = decimal.to.binary(i-1,ncol(dataset)); - } - colnames(curr.dataset) = colnames(dataset); - #define the samples distribution induced by the topology - samples.probabilities = estimate.tree.samples(curr.dataset,reconstructed.topology,estimated.marginal.probabilities,estimated.conditional.probabilities,error.rates); - } - #structure to save the results of the bootstrap - bootstrap.results = array(-1,c(nboot,ncol(dataset))); - colnames(bootstrap.results) = colnames(dataset); - #perform nboot bootstrap resampling - for (num in 1:nboot) { - #performed the bootstrapping procedure - if(command=="non-parametric") { - #perform the sampling for the current step of bootstrap - samples <- sample(1:nrow(dataset),size=nrow(dataset),replace=TRUE); - #perform the reconstruction on the bootstrapped dataset - check.data = check.dataset(dataset[samples,],FALSE); - } - else if(command=="parametric") { - #perform the sampling for the current step of bootstrap - samples <- sample(1:nrow(curr.dataset),size=nrow(dataset),replace=TRUE,prob=samples.probabilities); - #perform the reconstruction on the bootstrapped dataset - check.data = check.dataset(curr.dataset[samples,],FALSE); - } - #if the reconstruction was performed without errors - if(check.data$is.valid==TRUE) { - bootstrapped.dataset = check.data$dataset; - bootstrapped.topology = caprese.fit(bootstrapped.dataset,lambda,FALSE); - #set the reconstructed causal edges - parents.pos = array(-1,c(ncol(bootstrapped.topology$dataset),1)); - for(i in 1:ncol(bootstrapped.topology$dataset)) { - for(j in 1:ncol(bootstrapped.topology$dataset)) { - if(i!=j && bootstrapped.topology$adj.matrix[i,j]==1) { - parents.pos[j,1] = i; - } - } - } - #get the matched edge in the reconstruction - matched.idx = match(colnames(bootstrapped.topology$dataset),colnames(bootstrap.results)); - #if an event has no match, it means it has been merged and I discard it - parents.pos = parents.pos[!is.na(matched.idx)]; - matched.idx = matched.idx[!is.na(matched.idx)]; - #save the results - bootstrap.results[num,matched.idx] = parents.pos; - } - } - #set the statistics of the bootstrap - for(i in 1:ncol(bootstrap.adj.matrix)) { - for(j in 1:ncol(bootstrap.adj.matrix)) { - #if the edge is valid (no self cause) - if(i!=j) { - if(i==1 || j==1) { - if(j>1) { - curr.result = table(bootstrap.results[,j-1]); - curr.result = curr.result[names(curr.result)==-1]; - if(length(curr.result)>0) { - bootstrap.adj.matrix[i,j] = curr.result; - } - } - } - else { - curr.result = table(bootstrap.results[,j-1]); - curr.result = curr.result[names(curr.result)==(i-1)]; - if(length(curr.result)>0) { - bootstrap.adj.matrix[i,j] = curr.result; - } - } - } - } - } - #set the parent list of the topology previously reconstructed without any bootstrap - reconstructed.parents = ''; - for(i in 1:ncol(reconstructed.topology)) { - curr.parent = which(reconstructed.topology[,i]==1); - if(length(curr.parent)==0) { - curr.parent = -1; - } - reconstructed.parents = paste(reconstructed.parents,toString(curr.parent),sep=''); - } - #evalutate the overall confidence - overall.confidence = 0; - for(i in 1:nrow(bootstrap.results)) { - reconstructed.boot = ''; - for(j in 1:ncol(bootstrap.results)) { - reconstructed.boot = paste(reconstructed.boot,toString(bootstrap.results[i,j]),sep=''); - } - if(reconstructed.parents==reconstructed.boot) { - overall.confidence = overall.confidence + 1; - } - } - #save the edge confidence - edge.confidence = (reconstructed.topology*bootstrap.adj.matrix[-1,-1])/nboot; - #save the confidence from the bootstrap - confidence = list(overall.value=overall.confidence,overall.frequency=overall.confidence/nboot,bootstrap.values=bootstrap.adj.matrix[,-1],bootstrap.frequencies=bootstrap.adj.matrix[,-1]/nboot); - #save the settings of the bootstrap - bootstrap.settings = list(type=command,nboot=nboot); - #structure to save the results - bootstrap.statistics = list(reconstructed.topology=reconstructed.topology,confidence=confidence,edge.confidence=edge.confidence,bootstrap.settings=bootstrap.settings); - return(bootstrap.statistics); -} diff --git a/R/caprese.algorithm.R b/R/caprese.algorithm.R new file mode 100644 index 00000000..df519f02 --- /dev/null +++ b/R/caprese.algorithm.R @@ -0,0 +1,268 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + + +#' reconstruct the best tree-like topology +#' @title caprese.fit +#' @param dataset a dataset describing a progressive phenomenon +#' @param lambda shrinkage parameter (value in [0,1]) +#' @param do.estimation should I perform the estimation of the error rates and probabilities? +#' @param silent execute the algorithm in silent mode +#' @return topology: the reconstructed tree-like topology +caprese.fit = function( dataset, lambda = 0.5 , do.estimation = FALSE, silent = FALSE ) { + + # start the clock to measure the execution time + ptm <- proc.time(); + + # structure with the set of valid edges + # I start from the complete graph, i.e., I have no prior and all the connections are possibly causal + adj.matrix = array(1,c(ncol(dataset),ncol(dataset))); + colnames(adj.matrix) = colnames(dataset); + rownames(adj.matrix) = colnames(dataset); + + # the diagonal of the adjacency matrix should not be considered, i.e., no self cause is allowed + diag(adj.matrix) = 0; + + # check if the dataset is valid + valid.dataset = check.dataset(dataset,adj.matrix,FALSE); + adj.matrix = valid.dataset$adj.matrix; + + # marginal.probs is an array of the observed marginal probabilities + marginal.probs <- valid.dataset$marginal.probs; + # joint.probs is an array of the observed joint probabilities + joint.probs <- valid.dataset$joint.probs; + + # reconstruct the causal topology + best.parents = get.tree.parents(adj.matrix,marginal.probs,joint.probs,lambda); + + # create the structures where to save the results + parents.pos <- best.parents$parents; + conditional.probs <- array(-1, dim=c(length(parents.pos),1)); + adj.matrix <- array(0, dim=c(length(parents.pos),length(parents.pos))); + colnames(adj.matrix) = colnames(dataset); + rownames(adj.matrix) = colnames(dataset); + confidence <- array(list(), c(3,1)); + confidence[[1,1]] = array(0, dim=c(length(parents.pos),length(parents.pos))); + confidence[[2,1]] = best.parents$pr.score; + confidence[[3,1]] = array(0, dim=c(length(parents.pos),length(parents.pos))); + + # set the parents names and the structures + hypergeometric.pvalues = vector(); + for(i in 1:ncol(dataset)) { + # if the node has a parent + if(parents.pos[i,1]!=-1) { + # Note: [i,j] = 1 means that i is causing j + adj.matrix[parents.pos[i,1],i] = 1; + # compute the conditional probability of P(CHILD=1|PARENT=1) + conditional.probs[i,1] = best.parents$joint.probs[parents.pos[i,1],i]/best.parents$marginal.probs[parents.pos[i]]; + } + # if the node has no parent, its conditional probability is set to + else { + conditional.probs[i,1] = 1; + } + # compute the hypergeometric test + for (j in i:ncol(dataset)) { + if(i!=j) { + + # confidence in temporal priority + confidence[[1,1]][i,j] = min(best.parents$marginal.probs[i]/best.parents$marginal.probs[j],1); + confidence[[1,1]][j,i] = min(best.parents$marginal.probs[j]/best.parents$marginal.probs[i],1); + + # compute the confidence by hypergeometric test + confidence[[3,1]][i,j] = phyper(best.parents$joint.probs[i,j]*nrow(dataset),best.parents$marginal.probs[i]*nrow(dataset),nrow(dataset)-best.parents$marginal.probs[i]*nrow(dataset),best.parents$marginal.probs[j]*nrow(dataset),lower.tail=FALSE); + confidence[[3,1]][j,i] = confidence[[3,1]][i,j]; + # save all the valid pvalues + hypergeometric.pvalues = append(hypergeometric.pvalues,confidence[[2,1]][i,j]); + + } + } + } + + if(do.estimation) { + #estimate the error rates and, given them, the probabilities + estimated.error.rates = estimate.tree.error.rates(best.parents$marginal.probs,best.parents$joint.probs,parents.pos); + estimated.probabilities = estimate.tree.probs(best.parents$marginal.probs,best.parents$joint.probs,parents.pos,estimated.error.rates); + } + else { + estimated.error.rates = list(error.fp=NA,error.fn=NA); + estimated.probabilities = list(marginal.probs=NA,joint.probs=NA,conditional.probs=NA); + } + error.rates = estimated.error.rates; + estimated.probabilities.fit = estimated.probabilities; + + #structures where to save the results + model = list(); + adj.matrix.fit = list(); + adj.matrix.fit$adj.matrix.fit = adj.matrix; + probabilities.observed = list(marginal.probs=marginal.probs,joint.probs=joint.probs,conditional.probs=conditional.probs); + probabilities.fit = list(estimated.marginal.probs=estimated.probabilities.fit$marginal.probs,estimated.joint.probs=estimated.probabilities.fit$joint.probs,estimated.conditional.probs=estimated.probabilities.fit$conditional.probs); + probabilities = list(probabilities.observed=probabilities.observed,probabilities.fit=probabilities.fit); + + # save the results for the model + model[["caprese"]] = list(probabilities=probabilities,parents.pos=parents.pos,error.rates=error.rates,adj.matrix=adj.matrix.fit); + + # set the execution parameters + parameters = list(algorithm="CAPRESE",lambda=lambda,do.estimation=do.estimation,silent=silent); + + #return the results + topology = list(dataset=dataset,confidence=confidence,model=model,parameters=parameters,execution.time=(proc.time()-ptm)); + return(topology); + +} + +#' select at the most one parent for each node based on the probability raising criteria +#' @title get.tree.parents +#' @param adj.matrix adjacency matrix of the valid edges +#' @param marginal.probs observed marginal probabilities +#' @param joint.probs observed joint probabilities +#' @param lambda shrinkage parameter (value between 0 and 1) +#' @return best.parents list of the best parents +get.tree.parents = function( adj.matrix, marginal.probs, joint.probs, lambda ) { + + # compute the scores for each edge + scores = get.tree.scores(adj.matrix,marginal.probs,joint.probs,lambda); + pr.score = scores$pr.score; + # set to -1 the scores where there is no causation according to Suppes' condition + # [i,j] means i is causing j + for (i in 1:ncol(pr.score)) { + for (j in i:ncol(pr.score)) { + # the diagonal has not to be considered (no self-cause) + if(i==j) { + pr.score[i,j] = -1; + } + # otherwise, apply Suppes's criteria for prima facie cause + else { + # if both the scores are not greater then 0, they are not valid + # in this case the events are causally irrelevant, i.e., independent + if(pr.score[i,j]<=0 && pr.score[j,i]<=0) { + pr.score[i,j] = -1; + pr.score[j,i] = -1; + } + # if at least one score is greater then 0, I keep the greater one + # in this way I give a (time) direction to the progression + # furthermore, this constrain the topology to be acyclic by construction + else { + if(pr.score[i,j]>pr.score[j,i]) { + pr.score[j,i] = -1; + } + else { + pr.score[i,j] = -1; + } + } + } + } + } + + # chose at the most one parent per node + # here I suppose that each node has a parent + # spurious causes are considered (and removed) later + best.parents = array(-1, dim=c(ncol(pr.score),1)); + for (i in 1:ncol(pr.score)) { + # -1 means that the best parent is the Root + curr.best = -1; + # find the best parent for the current node + best = which.max(pr.score[,i]); + if(pr.score[best,i]>0) { + curr.best = best; + } + # set the best parent for the current node + best.parents[i,1] = curr.best; + } + + # check for spurious causes by the independent progression filter and complete the parents list + parents = verify.parents(best.parents,marginal.probs,joint.probs); + + # save the results + best.parents = list(parents=parents,marginal.probs=marginal.probs,joint.probs=joint.probs,pr.score=scores$pr.score); + return(best.parents); + +} + +#' compute the probability raising based scores +#' @title get.tree.scores +#' @param adj.matrix adjacency matrix of the valid edges +#' @param marginal.probs observed marginal probabilities +#' @param joint.probs observed joint probabilities +#' @param lambda shrinkage parameter (value between 0 and 1) +#' @return scores: probability raising based scores +get.tree.scores = function( adj.matrix, marginal.probs, joint.probs, lambda ) { + + #structure where to save the probability raising scores + pr.score = array(-1, dim=c(nrow(marginal.probs),nrow(marginal.probs))); + + # compute the probability raising based scores + for (i in 1:ncol(pr.score)) { + for (j in 1:ncol(pr.score)) { + + # if the edge is valid + if(adj.matrix[i,j]==1) { + + # alpha is the probability raising model of causation (raw model estimate) + alpha = ((joint.probs[i,j]/marginal.probs[i])-((marginal.probs[j]-joint.probs[i,j])/(1-marginal.probs[i])))/((joint.probs[i,j]/marginal.probs[i])+((marginal.probs[j]-joint.probs[i,j])/(1-marginal.probs[i]))); + + # beta is the correction factor (based on time distance in terms of statistical dependence) + beta = (joint.probs[i,j]-marginal.probs[i]*marginal.probs[j])/(joint.probs[i,j]+marginal.probs[i]*marginal.probs[j]); + + # the overall estimator is a shrinkage-like combination of alpha and beta + # the scores are saved in the convention used for an ajacency matrix, i.e. [i,j] means causal edge i-->j + pr.score[i,j] = (1-lambda)*alpha + lambda*beta; + + } + + } + } + scores = list(marginal.probs=marginal.probs,joint.probs=joint.probs,pr.score=pr.score); + return(scores); + +} + +#' verify the independent progression filter +#' @title verify.parents +#' @param best.parents best edges to be verified +#' @param marginal.probs observed marginal probabilities +#' @param joint.probs observed joint probabilities +#' @return best.parents: list of the best valid parents +verify.parents = function( best.parents, marginal.probs, joint.probs ) { + + # verify the condition for the best parent of each node + for (i in 1:length(best.parents)) { + # if there is a connection, i.e. the node is not already attached to the Root + if(best.parents[i]!=-1) { + # score for the root as the parent of this node + w.root.node = 1/(1+marginal.probs[i]); + # compute the scores for the edges to all the other upstream nodes + attach.to.root = 1; + for (j in 1:length(marginal.probs)) { + # if the connection is valid and the parent node has greater probability + # i.e. it is before the child in temporal order + if(i!=j && marginal.probs[j]>marginal.probs[i]) { + w.parent.node = (marginal.probs[j]/(marginal.probs[i]+marginal.probs[j]))*(joint.probs[i,j]/(marginal.probs[i]*marginal.probs[j])); + # the parent is valid if this condition is valid at least one time (i.e. for at least one of the upstream nodes) + # meaning that if we find out that a connection is not spurious for any node, the best parent is not spurious as well + if(w.root.node<=w.parent.node) { + attach.to.root = 0; + break; + } + } + } + # connect the node to the Root if the flag is true + if(attach.to.root==1) { + best.parents[i] = -1; + } + } + } + return(best.parents); + +} diff --git a/R/caprese.bootstrap.R b/R/caprese.bootstrap.R new file mode 100644 index 00000000..adb5ccf4 --- /dev/null +++ b/R/caprese.bootstrap.R @@ -0,0 +1,357 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + + +#' perform non-parametric or parametric bootstrap to evalutate the confidence of the reconstruction +#' @title bootstrap.caprese +#' @param dataset a dataset describing a progressive phenomenon +#' @param lambda shrinkage parameter (value in [0,1]) +#' @param do.estimation should I perform the estimation of the error rates and probabilities? +#' @param silent should I be verbose? +#' @param reconstruction todo +#' @param command type of search for the likelihood fit, either hill climbing (hc) or tabu (tabu) +#' @param nboot number of bootstrap resampling to be performed +#' @param bootstrap.statistics todo +#' @return bootstrap.statistics: statistics of the bootstrap +bootstrap.caprese = function(dataset, + lambda, + do.estimation, + silent, + reconstruction, + command = "non-parametric", + nboot = 100, + bootstrap.statistics = list()) +{ + + # start the clock to measure the execution time + ptm <- proc.time(); + + cores = detectCores() + if(cores < 1) { + cores = 1 + } + + expected.execution.time = round(((reconstruction$execution.time[3]*nboot)/(cores)),digits=0) + cat("Expected completion in approx.",format(.POSIXct(expected.execution.time,tz="GMT"),"%Hh:%Mm:%Ss"),"\n") + + # structure to save the results of the bootstrap + curr.bootstrap.results = array(list(-1), c(nboot,nevents(reconstruction))) + colnames(curr.bootstrap.results) = rownames(as.events(reconstruction)) + bootstrap.results = list() + bootstrap.results[names(as.models(reconstruction))] = list(curr.bootstrap.results) + + curr.bootstrap.adj.matrix = array(list(0), c(nevents(reconstruction)+1,nevents(reconstruction)+1)) + colnames(curr.bootstrap.adj.matrix) = c("None",rownames(as.events(reconstruction))) + rownames(curr.bootstrap.adj.matrix) = c("None",rownames(as.events(reconstruction))) + bootstrap.adj.matrix = list() + bootstrap.adj.matrix[names(as.models(reconstruction))] = list(curr.bootstrap.adj.matrix) + + bootstrap.adj.matrix.frequency = list() + bootstrap.adj.matrix.frequency[names(as.models(reconstruction))] = list(curr.bootstrap.adj.matrix) + + curr.edge.confidence = array(list(0), c(nevents(reconstruction),nevents(reconstruction))) + colnames(curr.edge.confidence) = rownames(as.events(reconstruction)) + rownames(curr.edge.confidence) = rownames(as.events(reconstruction)) + bootstrap.edge.confidence = list() + bootstrap.edge.confidence[names(as.models(reconstruction))] = list(curr.edge.confidence) + + overall.confidence = list(); + overall.confidence[names(as.models(reconstruction))] = list(0); + overall.frequency = list(); + overall.frequency[names(as.models(reconstruction))] = list(0); + + # reset the seed + set.seed(NULL) + + # create a progress bar + flush.console() + pb <- txtProgressBar(1, nboot, style = 3) + + # perform nboot bootstrap resampling + for (num in 1:nboot) { + + # update the progress bar + setTxtProgressBar(pb, num) + + # performed the bootstrapping procedure + if(command == "non-parametric") { + + # perform the sampling for the current step of bootstrap + samples = sample(1:nrow(dataset), size = nrow(dataset), replace = TRUE) + bootstrapped.dataset = dataset[samples,] + + curr.reconstruction = list() + curr.reconstruction$genotypes = bootstrapped.dataset; + curr.reconstruction$annotations = reconstruction$annotations; + curr.reconstruction$types = reconstruction$types; + curr.reconstruction$hypotheses = reconstruction$hypotheses; + + # perform the reconstruction on the bootstrapped dataset + bootstrapped.topology = tronco.caprese(curr.reconstruction, + lambda, + do.estimation, + silent) + + curr.reconstruction = bootstrapped.topology; + + } else if(command=="parametric") { + + if(num == 1) { + + # structure to save the samples probabilities + samples.probabilities = list(); + + # define the possible samples given the current number of events + possible.strings = 2 ^ ncol(dataset) + + err = "" + message = "Too many events in the dataset! Parametric bootstrastap can not be performed." + err = tryCatch({ + curr.dataset = suppressWarnings(array(0, c(possible.strings, ncol(dataset)))) + }, error = function(e) { + err <- message + }) + + + if(toString(err) == message) { + stop(err, call. = FALSE) + } + + for (i in 1:possible.strings) { + curr.dataset[i, ] = decimal.to.binary.dag(i - 1, ncol(dataset)) + } + + colnames(curr.dataset) = colnames(dataset) + + for (m in names(as.models(reconstruction))) { + + # estimate the samples probabilities for each model + samples.probabilities[m] = list(estimate.dag.samples(curr.dataset, + as.adj.matrix(reconstruction,model=m)[[m]], + as.marginal.probs(reconstruction,model=m,type="fit")[[m]], + as.conditional.probs(reconstruction,model=m,type="fit")[[m]], + as.parents.pos(reconstruction,model=m)[[m]], + as.error.rates(reconstruction,model=m)[[m]])) + + } + + } + + # perform the reconstruction for each model + new.reconstruction = reconstruction; + new.reconstruction$model = list(); + for (m in names(as.models(reconstruction))) { + + # perform the sampling for the current step of bootstrap and regularizator + samples = sample(1:nrow(curr.dataset), + size = nrow(dataset), + replace = TRUE, + prob = samples.probabilities[[m]]) + bootstrapped.dataset = curr.dataset[samples,] + + curr.reconstruction = list() + curr.reconstruction$genotypes = bootstrapped.dataset; + curr.reconstruction$annotations = reconstruction$annotations; + curr.reconstruction$types = reconstruction$types; + curr.reconstruction$hypotheses = reconstruction$hypotheses; + + # perform the reconstruction on the bootstrapped dataset + bootstrapped.topology = tronco.caprese(curr.reconstruction, + lambda, + do.estimation, + silent) + + # save the results for this model + new.reconstruction$model[m] = as.models(bootstrapped.topology,models=m) + + } + curr.reconstruction = new.reconstruction; + + } + + # set the reconstructed selective advantage edges + for (m in names(as.models(curr.reconstruction))) { + + # get the parents pos + parents.pos = array(list(), c(nevents(curr.reconstruction), 1)) + + + curr.adj.matrix = as.adj.matrix(curr.reconstruction,model=m)[[m]] + for(i in 1:nevents(curr.reconstruction)) { + for(j in 1:nevents(curr.reconstruction)) { + if(i!=j && curr.adj.matrix[i,j]==1) { + parents.pos[j, 1] = list(c(unlist(parents.pos[j,1]),i)) + } + } + } + + parents.pos[unlist(lapply(parents.pos,is.null))] = list(-1) + + # save the results + bootstrap.results[[m]][num,] = parents.pos; + + } + + } + + # close progress bar + close(pb) + + # set the statistics of the bootstrap + for (m in names(as.models(reconstruction))) { + curr.bootstrap.adj.matrix = bootstrap.adj.matrix[[m]] + for(i in 2:ncol(curr.bootstrap.adj.matrix)) { + curr.result = bootstrap.results[[m]][,i-1] + for(j in 1:length(curr.result)) { + curr.val = curr.result[[j]] + for(k in 1:length(curr.val)) { + if(length(curr.val[k])==1 && curr.val[k] == -1) { + curr.bootstrap.adj.matrix[[1,i]] = curr.bootstrap.adj.matrix[[1,i]] + 1 + } else { + curr.bootstrap.adj.matrix[[curr.val[k] + 1, i]] = curr.bootstrap.adj.matrix[[curr.val[k] + 1, i]] + 1 + } + } + } + } + + bootstrap.adj.matrix[[m]] = curr.bootstrap.adj.matrix; + rownames(bootstrap.results[[m]]) = paste("Iteration ",1:nrow(bootstrap.results[[m]]),sep="") + + } + + # evalutate the overall confidence + for (m in names(as.models(reconstruction))) { + curr.bootstrap.results = bootstrap.results[[m]] + for(i in 1:nrow(curr.bootstrap.results)) { + curr.adj.matrix = array(0, c(nevents(reconstruction),nevents(reconstruction))) + for(j in 1:ncol(curr.bootstrap.results)) { + curr.result = curr.bootstrap.results[i, j] + for(k in 1:length(curr.result)) { + curr.val = curr.result[[k]] + for(l in 1:length(curr.val)) { + if(length(curr.val[l])>1 || curr.val[l] != -1) { + curr.adj.matrix[curr.val[l], j] = 1 + } + } + } + } + + # if I have a perfect match between the reconstructed topologies, increase the count + reconstructed.topology = as.adj.matrix(reconstruction,model=m)[[m]] + flag = TRUE; + for (j in 1:nrow(reconstructed.topology)) { + for (k in 1:ncol(reconstructed.topology)) { + if(reconstructed.topology[j,k]!=curr.adj.matrix[j,k]) { + flag = FALSE; + next(); + } + } + } + if(flag==TRUE) { + overall.confidence[[m]] = overall.confidence[[m]] + 1 + overall.frequency[[m]] = overall.confidence[[m]] / nboot + } + } + } + + # save the edge confidence and the frequency of the bootstrap adj.matrix + for (m in names(as.models(reconstruction))) { + + curr.adj.matrix = as.adj.matrix(reconstruction,model=m)[[m]]; + + # save the edge confidence + curr.bootstrap.matrix = bootstrap.adj.matrix[[m]][-1,-1]; + curr.edge.confidence = array(0,c(ncol(curr.bootstrap.matrix),nrow(curr.bootstrap.matrix))) + colnames(curr.edge.confidence) = colnames(curr.bootstrap.matrix); + rownames(curr.edge.confidence) = rownames(curr.bootstrap.matrix); + for (i in 1:ncol(curr.bootstrap.matrix)) { + for (j in 1:nrow(curr.bootstrap.matrix)) { + curr.edge.confidence[i,j] = (curr.adj.matrix[i,j]*as.numeric(curr.bootstrap.matrix[i,j]))/nboot + } + } + bootstrap.edge.confidence[[m]] = curr.edge.confidence + + # save the frequency of the bootstrap adj.matrix + curr.bootstrap.matrix = bootstrap.adj.matrix[[m]]; + curr.adj.matrix.frequency = array(0,c(ncol(curr.bootstrap.matrix),nrow(curr.bootstrap.matrix))) + colnames(curr.adj.matrix.frequency) = colnames(curr.bootstrap.matrix); + rownames(curr.adj.matrix.frequency) = rownames(curr.bootstrap.matrix); + for (i in 1:ncol(curr.bootstrap.matrix)) { + for (j in 1:nrow(curr.bootstrap.matrix)) { + curr.adj.matrix.frequency[i,j] = as.numeric(as.numeric(curr.bootstrap.matrix[i,j]))/nboot + } + } + bootstrap.adj.matrix.frequency[[m]] = curr.adj.matrix.frequency + + } + + # save the statistics of the bootstrap + for (m in names(as.models(reconstruction))) { + if(command == "non-parametric") { + bootstrap.statistics[[m]]$npb$bootstrap.results = bootstrap.results[[m]] + bootstrap.statistics[[m]]$npb$bootstrap.adj.matrix = list(count = bootstrap.adj.matrix[[m]], frequency = bootstrap.adj.matrix.frequency[[m]]) + bootstrap.statistics[[m]]$npb$bootstrap.edge.confidence = bootstrap.edge.confidence[[m]] + bootstrap.statistics[[m]]$npb$overall.confidence = list(count = overall.confidence[[m]], frequency = overall.frequency[[m]]) + bootstrap.statistics[[m]]$npb$bootstrap.settings = list(type = command, nboot = nboot) + } + else if(command == "parametric") { + bootstrap.statistics[[m]]$pb$bootstrap.results = bootstrap.results[[m]] + bootstrap.statistics[[m]]$pb$bootstrap.adj.matrix = list(count = bootstrap.adj.matrix[[m]], frequency = bootstrap.adj.matrix.frequency[[m]]) + bootstrap.statistics[[m]]$pb$bootstrap.edge.confidence = bootstrap.edge.confidence[[m]] + bootstrap.statistics[[m]]$pb$overall.confidence = list(count = overall.confidence[[m]], frequency = overall.frequency[[m]]) + bootstrap.statistics[[m]]$pb$bootstrap.settings = list(type = command, nboot = nboot) + } + else if(command == "statistical") { + bootstrap.statistics[[m]]$sb$bootstrap.results = bootstrap.results[[m]] + bootstrap.statistics[[m]]$sb$bootstrap.adj.matrix = list(count = bootstrap.adj.matrix[[m]], frequency = bootstrap.adj.matrix.frequency[[m]]) + bootstrap.statistics[[m]]$sb$bootstrap.edge.confidence = bootstrap.edge.confidence[[m]] + bootstrap.statistics[[m]]$sb$overall.confidence = list(count = overall.confidence[[m]], frequency = overall.frequency[[m]]) + bootstrap.statistics[[m]]$sb$bootstrap.settings = list(type = command, nboot = nboot) + } + } + + # save the execution time of the bootstrap + if(command == "non-parametric") { + bootstrap.statistics$npb$execution.time=(proc.time()-ptm) + } + else if(command == "parametric") { + bootstrap.statistics$pb$execution.time=(proc.time()-ptm) + } + else if(command == "statistical") { + bootstrap.statistics$sb$execution.time=(proc.time()-ptm) + } + + return(bootstrap.statistics) +} + +#' convert an integer decimal number to binary +#' @title decimal.to.binary.tree +#' @param num.decimal decimal integer to be converted +#' @param num.bits number of bits to be used +#' @return num.binary: binary conversion of num.decimal +decimal.to.binary.tree = function(num.decimal, num.bits) { + #structure where to save the result + num.binary = rep(0,num.bits); + #convert the integer decimal number to binary + pos = 0; + while(num.decimal>0) { + #compute the value of the current step + num.binary[num.bits-pos] = num.decimal %% 2; + #divide the number by 2 for the next iteration + num.decimal = num.decimal %/% 2; + pos = pos + 1; + } + return(num.binary); +} diff --git a/R/caprese.estimation.R b/R/caprese.estimation.R new file mode 100644 index 00000000..370d3453 --- /dev/null +++ b/R/caprese.estimation.R @@ -0,0 +1,454 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + + +#' estimate the error rates by "L-BFGS-B" optimization in terms of L2-error +#' @title estimate.tree.error.rates +#' @param marginal.probs marginal probabilities +#' @param joint.probs joint probabilities +#' @param parents.pos which event is the parent? 0 if none, a number otherwise +#' @return estimated.error.rates: estimated probabilities, false positive and false negative error rates +estimate.tree.error.rates = function(marginal.probs, + joint.probs, + parents.pos) +{ + + # function to be optimized by "L-BFGS-B" optimization in terms of L2-error + f.estimation <- function(errors) { + # set the current error rates with the starting point of the optimization being (e_pos,e_neg) = 0 + error.rates = list(error.fp=errors[1], error.fn=errors[2]) + # estimate the observed probabilities given the error rates + estimated.probs = estimate.tree.probs(marginal.probs, joint.probs, parents.pos,error.rates) + # evaluate the goodness of the estimatione by L2-error on the estimated marginal and joint probabilities + error.estimation = sum((marginal.probs - estimated.probs$marginal.probs)^2) + sum((joint.probs-estimated.probs$joint.probs)^2) + return(error.estimation); + } + + # the estimation is performed as in Byrd et al (1995) + # this method allows for box constraints, i.e., each variable can be given a lower and/or upper bound + estimated.error.rates = optim(c(0.00, 0.00), + f.estimation, + method="L-BFGS-B", + lower=c(0.00, 0.00), + upper=c(0.49, 0.49))$par + + # structure to save the results + estimated.error.rates = list(error.fp=estimated.error.rates[1], error.fn=estimated.error.rates[2]) + return(estimated.error.rates) + +} + +#' estimate the theoretical joint probability of two given nodes given the reconstructed topology +#' @title estimate.tree.joint.probs +#' @param first.node first node +#' @param second.node second node +#' @param parents.pos which event is the parent? -1 if none, a number otherwise +#' @param marginal.probs marginal probabilities +#' @param conditional.probs conditional probabilities +#' @return estimated.tree.joint.probs: estimated theoretical joint probability +estimate.tree.joint.probs = function(first.node, + second.node, + parents.pos, + marginal.probs, + conditional.probs) +{ + + # if the two nodes are roots + if(parents.pos[first.node] == -1 && parents.pos[second.node] == -1) { + estimated.tree.joint.probs = marginal.probs[first.node, 1] * marginal.probs[second.node, 1] + } else if(first.node == second.node) { + # if the two nodes are the same node + estimated.tree.joint.probs = marginal.probs[first.node, 1] + } else { + # otherwise + # go through the parents starting from the two nodes to find if they are directly connected + # are the two nodes in the same path? + is.path = 0; + # check if first.node is an ancestor of second.node + curr.first = first.node + curr.second = second.node + while(parents.pos[curr.second] != -1) { + if(curr.first == curr.second) { + is.path = 1 + is.child = second.node + break + } + curr.second = parents.pos[curr.second] + } + if(is.path == 0) { + # check if second.node is an ancestor of first.node + curr.first = first.node; + curr.second = second.node; + while(parents.pos[curr.first] != -1) { + if(curr.first == curr.second) { + is.path = 1 + is.child = first.node + break + } + curr.first = parents.pos[curr.first] + } + } + # check if the two nodes are at least connected + # are the two nodes connected? + is.connected = 0 + if(is.path == 0) { + curr.first = first.node + curr.second = second.node + while(parents.pos[curr.first] != -1) { + while(parents.pos[curr.second] != -1) { + if(curr.first == curr.second) { + is.connected = 1 + is.ancestor = curr.first + break + } else { + curr.second = parents.pos[curr.second] + } + } + if(is.connected == 0) { + curr.first = parents.pos[curr.first] + curr.second = second.node + } else { + break; + } + } + } + # now I can set the joint probabilities + # in this case the two nodes are directly connected + # P(child,parent)_estimate = P(child); + if(is.path == 1) { + estimated.tree.joint.probs = marginal.probs[is.child, 1] + } else if(is.connected == 1) { + # in this case the two nodes are indirectly connected + # P(i,j)_estimate = P(ancestor)_estimate * P_PATH(ancestor->first.node)_estimate * P_PATH(ancestor->second.node)_estimate + # P(ancestor)_estimate + estimated.tree.joint.probs = marginal.probs[is.ancestor, 1] + # P_PATH(ancestor->first.node)_estimate + first.path = 1 + curr.first = first.node + while(parents.pos[curr.first] != is.ancestor) { + first.path = first.path * conditional.probs[curr.first, 1] + curr.first = parents.pos[curr.first] + } + second.path = 1 + curr.second = second.node + while(parents.pos[curr.second] != is.ancestor) { + second.path = second.path * conditional.probs[curr.second,1] + curr.second = parents.pos[curr.second] + } + estimated.tree.joint.probs = estimated.tree.joint.probs * first.path * second.path + } else { + # in this case the two nodes are not connected + # P(i,j)_estimate = P(i)_estimate * P(j)_estimate + estimated.tree.joint.probs = marginal.probs[first.node, 1] * marginal.probs[second.node, 1] + } + } + return(estimated.tree.joint.probs) + +} + +#' estimate the marginal, joint and conditional probabilities given the reconstructed topology and the error rates +#' @title estimate.tree.probs +#' @param marginal.probs observed marginal probabilities +#' @param joint.probs observed joint probabilities +#' @param parents.pos position of the parents in the list of nodes +#' @param error.rates rates for the false positive and the false negative errors +#' @return estimated.probs estimated marginal, joint and conditional probabilities +estimate.tree.probs = function(marginal.probs, + joint.probs, + parents.pos, + error.rates) { + + # structure where to save the probabilities to be estimated + estimated.marginal.probs = array(-1, dim=c(nrow(marginal.probs), 1)) + estimated.joint.probs = array(-1, dim=c(nrow(marginal.probs), nrow(marginal.probs))) + estimated.conditional.probs = array(-1, dim=c(nrow(marginal.probs), 1)) + + # estimate the theoretical conditional probabilities given the error rates + # this estimation is performed by applying the error rates to the marginal and joint probabilities + theoretical.conditional.probs = array(-1, dim=c(nrow(marginal.probs),1)) + for (i in 1:nrow(theoretical.conditional.probs)) { + # if the node has a parent, use the error rates to compute the conditional probability + # if the node has no parent, its conditional probability is not considered + if(parents.pos[i,1] != -1) { + # P(i|j)_theoretical = ((P(i,j)_obs-e_p*(P(j)_obs+P(i)_obs)+e_p^2)/(1-e_n-e_p)^2)/((P(j)_obs-e_p)/(1-e_n-e_p)) + theoretical.conditional.probs[i,1] = (joint.probs[i, parents.pos[i,1]] - error.rates$error.fp * (marginal.probs[parents.pos[i,1],1]+marginal.probs[i,1])+error.rates$error.fp^2)/((marginal.probs[parents.pos[i,1],1]-error.rates$error.fp)*(1-error.rates$error.fn-error.rates$error.fp)); + if(theoretical.conditional.probs[i,1] < 0 || theoretical.conditional.probs[i,1]>1) { + # invalid theoretical conditional probability + if(theoretical.conditional.probs[i,1] < 0) { + theoretical.conditional.probs[i,1] = 0 + } else { + theoretical.conditional.probs[i,1] = 1 + } + } + } + } + + # estimate the marginal observed probabilities + # this estimation is performed by applying the topological constraints on the probabilities and then the error rates + # I do not have any constraint on the nodes without a parent + child.list = which(parents.pos == -1) + estimated.marginal.probs[child.list, 1] = marginal.probs[child.list, 1] + estimated.marginal.probs.with.error = array(-1, dim=c(nrow(marginal.probs), 1)) + estimated.marginal.probs.with.error[child.list, 1] = estimated.marginal.probs[child.list, 1] + visited = length(child.list) + # I do not have any constraint for the joint probabilities on the pair of nodes which are the roots of the tree/forest + estimated.joint = array(0, dim=c(nrow(marginal.probs), nrow(marginal.probs))) + for (i in child.list) { + for (j in child.list) { + if(i!=j) { + estimated.joint.probs[i, j] = joint.probs[i, j] + estimated.joint[i,j] = -1 + } + } + } + # visit the nodes with a parent in topological order + while (visited < nrow(estimated.marginal.probs)) { + # set the new child list + new.child = vector() + # go through the current parents + for (node in child.list) { + # set the new children + curr.child <- which(parents.pos == node) + # go through the current children + for (child in curr.child) { + # set the marginal probability for this node + # P(child)_estimate = P(parent)_estimate * P(child|parent)_theoretical + estimated.marginal.probs[child,1] = estimated.marginal.probs[parents.pos[child, 1], 1] * theoretical.conditional.probs[child,1]; + visited = visited + 1 + # P(child,parent)_estimare = P(child)_estimate; + estimated.joint.probs[child,parents.pos[child, 1]] = estimated.marginal.probs[child, 1] + estimated.joint[child,parents.pos[child, 1]] = 1 + estimated.joint.probs[parents.pos[child, 1], child] = estimated.marginal.probs[child, 1] + estimated.joint[parents.pos[child, 1], child] = 1 + # apply the error rates to the marginal probabilities + # P(i)_obs_estimate = P(i)_estimate*(1-e_n) + P(not i)_estimate*e_p + estimated.marginal.probs.with.error[child, 1] = error.rates$error.fp + (1 - error.rates$error.fn-error.rates$error.fp) * estimated.marginal.probs[child, 1] + if(estimated.marginal.probs.with.error[child, 1] < 0 || estimated.marginal.probs.with.error[child, 1] > 1) { + # invalid estimated observed probability + if(estimated.marginal.probs.with.error[child, 1] < 0) { + estimated.marginal.probs.with.error[child, 1] = 0 + } else { + estimated.marginal.probs.with.error[child, 1] = 1 + } + } + } + new.child = c(new.child,curr.child) + } + # set the next child list + child.list = new.child + } + diag(estimated.joint.probs) = estimated.marginal.probs + diag(estimated.joint) = -1 + + # given the estimated observed probabilities, I can now also estimate the joint probabilities by applying the topological constraints and then the error rates + for (i in 1:nrow(estimated.joint.probs)) { + for (j in i:nrow(estimated.joint.probs)) { + # if I still need to estimate this joint probability + if(estimated.joint[i,j] == 0) { + estimated.joint.probs[i, j] = estimate.tree.joint.probs(i, + j, + parents.pos, + estimated.marginal.probs, + theoretical.conditional.probs) + estimated.joint[i, j] = 1 + } + # now I can apply the error rates to estimate the observed joint probabilities + if(estimated.joint[i,j] == 1) { + # P(i,j)_obs_estimate = P(i,j)_estimate*(1-e_n)^2+P(not i,j)_estimate*e_p*(1-e_n)+P(i,not j)_estimate*(1-e_n)*e_p+P(not i,not j)_estimate*e_p^2; + estimated.joint.probs[i,j] = estimated.joint.probs[i, j] * ((1-error.rates$error.fn - error.rates$error.fp)^2) + error.rates$error.fp * (estimated.marginal.probs[i, 1] + estimated.marginal.probs[j, 1]) - error.rates$error.fp^2 + # invalid estimated joint probability + if(estimated.joint.probs[i,j] < 0 || estimated.joint.probs[i, j] > min(estimated.marginal.probs.with.error[i, 1], estimated.marginal.probs.with.error[j, 1])) { + if(estimated.joint.probs[i,j] < 0) { + estimated.joint.probs[i,j] = 0 + } else { + estimated.joint.probs[i, j] = min(estimated.marginal.probs.with.error[i, 1], estimated.marginal.probs.with.error[j, 1]) + } + } + estimated.joint.probs[j, i] = estimated.joint.probs[i,j] + } + } + } + + # save the estimated probabilities + estimated.marginal.probs = estimated.marginal.probs.with.error + # given the estimated observed and joint probabilities, I can finally compute the conditional probabilities + # P(child|parent)_obs_estimate = P(parent,child)_obs_estimate/P(parent)_obs_estimate + for (i in 1:nrow(estimated.conditional.probs)) { + if(parents.pos[i, 1] != -1) { + if(estimated.marginal.probs[parents.pos[i, 1], 1] > 0) { + estimated.conditional.probs[i, 1] = estimated.joint.probs[parents.pos[i, 1], i] / estimated.marginal.probs[parents.pos[i, 1], 1] + } else { + estimated.conditional.probs[i, 1] = 0 + } + } else { + # if the node has no parent, its conditional probability is set to 1 + estimated.conditional.probs[i, 1] = 1 + } + } + + # structure to save the results + estimated.probs = list(marginal.probs=estimated.marginal.probs, + joint.probs=estimated.joint.probs, + conditional.probs=estimated.conditional.probs) + return(estimated.probs); + +} + +#' estimate the probability of observing each sample in the dataset given the reconstructed topology +#' @title estimate.tree.samples +#' @param dataset a valid dataset +#' @param reconstructed.topology the reconstructed topology +#' @param estimated.marginal.probabilities estimated marginal probabilities of the events +#' @param estimated.conditional.probabilities estimated conditional probabilities of the events +#' @param error.rates error rates for false positives and false negatives +#' @return probabilities: probability of each sample +estimate.tree.samples = function(dataset, + reconstructed.topology, + estimated.marginal.probabilities, + estimated.conditional.probabilities, + error.rates) { + + # structure where to save the probabilities of the samples + probabilities = array(-1, c(nrow(dataset), 1)) + # topological properties: + # 1. tree number + # 2. parent + # 3. level in the tree + topology.structure = array(0, c(nrow(reconstructed.topology), 3)) + + # go through the subtrees within the topology of four + tree.count = 0 + for (i in 1:nrow(reconstructed.topology)) { + # if node i has no parents, it is a root + if(length(which(reconstructed.topology[,i] == 1)) == 0) { + tree.count = tree.count + 1 + level = 1 + # set the parameters for the root + topology.structure[i,1] = tree.count + topology.structure[i,2] = -1 + topology.structure[i,3] = level + curr.node = i + # go through this tree + while (length(curr.node) > 0) { + # move to the next level + level = level + 1 + new.node = vector() + for (j in 1:length(curr.node)) { + curr.new.node = which(reconstructed.topology[curr.node[j], ] == 1) + if(length(curr.new.node) > 0) { + new.node = c(new.node,curr.new.node) + for (k in 1:length(curr.new.node)) { + # number of the current subtree + topology.structure[curr.new.node[k], 1] = tree.count + # parent of the current node + topology.structure[curr.new.node[k], 2] = curr.node[j] + # level of this node + topology.structure[curr.new.node[k], 3] = level + } + } + } + curr.node = new.node + } + } + } + + # go through the dataset and evalutate the probability of each sample + for (i in 1:nrow(dataset)) { + sample.probability = 1 + for (j in 1:tree.count) { + # probability of this subtree (without any knowledge, I set it to 1) + curr.sample.probability = 1 + # entries referring to this subtree + curr.entry = which(topology.structure[, 1] == j) + # samples of each element of this subtree + curr.sample = dataset[i,curr.entry] + # parents of each element of this subtree + curr.parents = topology.structure[curr.entry, 2] + # level of each element of this subtree + curr.levels = topology.structure[curr.entry, 3] + # set the probability as the one of the root of this tree + curr.sample.probability = curr.sample.probability * estimated.marginal.probabilities[curr.entry[which(curr.levels == 1, arr.ind=TRUE)], 1] + # set the maximum level of this subtree + max.level = curr.levels[which.max(curr.levels)] + # if I have at least one event in this sample + if(length(curr.sample[curr.sample == 1]) > 0) { + # visit the nodes starting from the lower level + is.valid = TRUE + for (k in max.level:1) { + curr.level.nodes = which(curr.levels == k, arr.ind=TRUE) + # if I'm not on a root + if(k > 1) { + curr.level.samples = curr.sample[curr.level.nodes] + # if I have at least one event at this level + if(length(curr.level.samples[curr.level.samples == 1]) > 0) { + # I can not have a child without its parent + curr.level.parent = curr.parents[curr.level.nodes] + for (p in 1:length(curr.level.parent)) { + if(dataset[i,curr.level.parent[p]] == 0 && dataset[i, curr.entry[curr.level.nodes[p]]] == 1) { + is.valid = FALSE + break + } + } + } + # if the sample is valid + if(is.valid == TRUE) { + # add the probability of each edge + curr.level.parent = curr.parents[curr.level.nodes] + for (p in 1:length(curr.level.parent)) { + if(dataset[i, curr.level.parent[p]] == 1 && dataset[i,curr.entry[curr.level.nodes[p]]] == 0) { + curr.sample.probability = curr.sample.probability * (1 - estimated.conditional.probabilities[curr.entry[curr.level.nodes[p]], 1]) + } + else if(dataset[i,curr.level.parent[p]]==1 && dataset[i, curr.entry[curr.level.nodes[p]]] == 1) { + curr.sample.probability = curr.sample.probability * estimated.conditional.probabilities[curr.entry[curr.level.nodes[p]], 1] + } + } + } + } + if(is.valid == FALSE) { + curr.sample.probability = 0 + break + } + } + if(is.valid == FALSE) { + sample.probability = 0 + break + } + } else { + # if this sample has no events for this tree + curr.sample.probability = 1 - curr.sample.probability + } + # update the probability of the topology with the one of this sample + sample.probability = sample.probability * curr.sample.probability + if(sample.probability == 0) { + break + } + } + probabilities[i, 1] = sample.probability; + } + + # correct the estimation by the error rates + errors.matrix <- array(0, c(nrow(probabilities), nrow(dataset))) + for (i in 1:nrow(probabilities)) { + for (j in 1:nrow(dataset)) { + curr.sample.x = as.numeric(dataset[i, ]) + curr.sample.y = as.numeric(dataset[j, ]) + errors.matrix[i, j] = (1 - error.rates$error.fp)^((1 - curr.sample.x) %*% (1 - curr.sample.y)) * error.rates$error.fp ^ ((1 - curr.sample.x) %*% curr.sample.y) * (1 - error.rates$error.fn)^(curr.sample.x %*% curr.sample.y) * error.rates$error.fn^(curr.sample.x %*% (1 - curr.sample.y)) + } + } + + probabilities[, 1] = as.numeric(as.vector(probabilities) %*% errors.matrix) + return(probabilities) + +} diff --git a/R/caprese.fit.R b/R/caprese.fit.R deleted file mode 100644 index c8e58ffe..00000000 --- a/R/caprese.fit.R +++ /dev/null @@ -1,72 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#reconstruct the best tree-like topology -#INPUT: -#dataset: a dataset describing a progressive phenomenon -#lambda: shrinkage parameter (value in [0,1]) -#verbose: should I print the warnings? Yes if TRUE, no otherwise -#RETURN: -#topology: the reconstructed tree-like topology -"caprese.fit" <- -function(dataset,lambda,verbose) { - #structure to compute the observed marginal and joint probabilities - pair.count <- array(0, dim=c(ncol(dataset), ncol(dataset))); - #compute the probabilities on the dataset - for(i in 1:ncol(dataset)) { - for(j in 1:ncol(dataset)) { - val1 = dataset[ ,i]; - val2 = dataset[ ,j]; - pair.count[i,j] = (t(val1) %*% val2); - } - } - #marginal.probs is an array of the observed marginal probabilities - marginal.probs <- array(as.matrix(diag(pair.count)/nrow(dataset)),dim=c(ncol(dataset),1)); - #joint.probs is an array of the observed joint probabilities - joint.probs <- as.matrix(pair.count/nrow(dataset)); - #reconstruct the causal topology - best.parents = get.tree.parents(marginal.probs,joint.probs,lambda); - #create the structures where to save the results - parents.pos <- best.parents$parents; - conditional.probs <- array(-1, dim=c(length(parents.pos),1)); - adj.matrix <- array(0, dim=c(length(parents.pos),length(parents.pos))); - #set the parents names and the structures - for(i in 1:ncol(dataset)) { - #if the node has a parent - if(parents.pos[i,1]!=-1) { - #Note: [i,j] = 1 means that i is causing j - adj.matrix[parents.pos[i,1],i] = 1; - #compute the conditional probability of P(CHILD=1|PARENT=1) - conditional.probs[i,1] = best.parents$joint.probs[parents.pos[i,1],i]/best.parents$marginal.probs[parents.pos[i]]; - } - #if the node has no parent, its conditional probability is set to - else { - conditional.probs[i,1] = 1; - } - } - - #estimate the error rates and, given them, the probabilities - estimated.error.rates = estimate.tree.error.rates(best.parents$marginal.probs,best.parents$joint.probs,parents.pos); - estimated.probabilities = estimate.tree.probs(best.parents$marginal.probs,best.parents$joint.probs,parents.pos,estimated.error.rates); - #structures where to save the probabilities - probabilities = list(marginal.probs=best.parents$marginal.probs,joint.probs=best.parents$joint.probs,conditional.probs=conditional.probs,estimated.marginal.probs=estimated.probabilities$marginal.probs,estimated.joint.probs=estimated.probabilities$joint.probs,estimated.conditional.probs=estimated.probabilities$conditional.probs); - parameters = list(lambda=lambda); - #return the results - topology = list(dataset=dataset, probabilities=probabilities,error.rates=estimated.error.rates,pr.score=best.parents$pr.score,adj.matrix=adj.matrix,parameters=parameters); - return(topology); -} diff --git a/R/capri.algorithm.R b/R/capri.algorithm.R new file mode 100644 index 00000000..9072aca8 --- /dev/null +++ b/R/capri.algorithm.R @@ -0,0 +1,1175 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + +#' reconstruct the best dag topology running CAPRI algorithm +#' @title capri.fit +#' @param dataset a dataset describing a progressive phenomenon +#' @param hypotheses hypotheses to be considered in the reconstruction +#' @param command type of search for the likelihood fit, either hill climbing (hc) or tabu (tabu) +#' @param regularization regularizators to be used for the likelihood fit +#' @param do.boot should I perform bootstrap? Yes if TRUE, no otherwise +#' @param nboot integer number (greater than 0) of bootstrap sampling to be performed +#' @param pvalue pvalue for the tests (value between 0 and 1) +#' @param min.boot minimum number of bootstrapping to be performed +#' @param min.stat should I keep bootstrapping untill I have nboot valid values? +#' @param boot.seed seed to be used for the sampling +#' @param do.estimation should I perform the estimation of the error rates and probabilities? +#' @param silent should I be verbose? +#' @return topology: the reconstructed tree topology +capri.fit = function(dataset, + hypotheses = NA, + command = "hc", + regularization = c("bic","aic"), + do.boot = TRUE, + nboot = 100, + pvalue = 0.05, + min.boot = 3, + min.stat = TRUE, + boot.seed = NULL, + do.estimation = FALSE, + silent = FALSE ) +{ + + # start the clock to measure the execution time + ptm <- proc.time(); + + # structure with the set of valid edges + # I start from the complete graph, i.e., I have no prior and all the connections are possibly causal + adj.matrix = array(1,c(ncol(dataset),ncol(dataset))); + colnames(adj.matrix) = colnames(dataset); + rownames(adj.matrix) = colnames(dataset); + + # the diagonal of the adjacency matrix should not be considered, i.e., no self cause is allowed + diag(adj.matrix) = 0; + + # consider any hypothesis + adj.matrix = hypothesis.adj.matrix(hypotheses,adj.matrix); + + # check if the dataset is valid + valid.dataset = check.dataset(dataset,adj.matrix,FALSE); + adj.matrix = valid.dataset$adj.matrix; + invalid.events = valid.dataset$invalid.events; + + # reconstruct the prima facie topology + # should I perform bootstrap? Yes if TRUE, no otherwise + if(do.boot==TRUE) { + if(!silent) cat('*** Bootstraping selective advantage scores (prima facie).\n') + prima.facie.parents = get.prima.facie.parents.do.boot(dataset,hypotheses,nboot,pvalue,adj.matrix,min.boot,min.stat,boot.seed,silent); + } + else { + if(!silent) cat('*** Computing selective advantage scores (prima facie).\n') + prima.facie.parents = get.prima.facie.parents.no.boot(dataset,hypotheses,adj.matrix,silent); + } + + # add back in any connection invalid for the probability raising theory + if(length(invalid.events)>0) { + for(i in 1:nrow(invalid.events)) { + prima.facie.parents$adj.matrix$adj.matrix.acyclic[invalid.events[i,"cause"],invalid.events[i,"effect"]] = 1; + prima.facie.parents$adj.matrix$adj.matrix.cyclic[invalid.events[i,"cause"],invalid.events[i,"effect"]] = 1; + } + } + adj.matrix.prima.facie = prima.facie.parents$adj.matrix$adj.matrix.cyclic + + # perform the likelihood fit with the required regularization scores + model = list(); + for (reg in regularization) { + + # perform the likelihood fit with the chosen regularization score on the prima facie topology + if(!silent) cat(paste0('*** Performing likelihood-fit with regularization ',reg,'.\n')) + best.parents = perform.likelihood.fit(dataset,prima.facie.parents$adj.matrix$adj.matrix.acyclic,command,regularization=reg); + + # set the structure to save the conditional probabilities of the reconstructed topology + parents.pos.fit = array(list(),c(ncol(dataset),1)); + conditional.probs.fit = array(list(),c(ncol(dataset),1)); + + # compute the conditional probabilities + for(i in 1:ncol(dataset)) { + for(j in 1:ncol(dataset)) { + if(i!=j && best.parents$adj.matrix$adj.matrix.fit[i,j]==1) { + parents.pos.fit[j,1] = list(c(unlist(parents.pos.fit[j,1]),i)); + conditional.probs.fit[j,1] = list(c(unlist(conditional.probs.fit[j,1]),prima.facie.parents$joint.probs[i,j]/prima.facie.parents$marginal.probs[i])); + } + } + } + parents.pos.fit[unlist(lapply(parents.pos.fit,is.null))] = list(-1); + conditional.probs.fit[unlist(lapply(conditional.probs.fit,is.null))] = list(1); + + # perform the estimation of the probabilities if requested + if(do.estimation) { + # estimate the error rates and, given them, the probabilities for the causal topology + estimated.error.rates.fit = estimate.dag.error.rates(dataset,prima.facie.parents$marginal.probs,prima.facie.parents$joint.probs,parents.pos.fit); + estimated.probabilities.fit = estimate.dag.probs(dataset,prima.facie.parents$marginal.probs,prima.facie.parents$joint.probs,parents.pos.fit,estimated.error.rates.fit); + } + else { + estimated.error.rates.fit = list(error.fp=NA,error.fn=NA); + estimated.probabilities.fit = list(marginal.probs=NA,joint.probs=NA,conditional.probs=NA); + } + + # set results for the current regolarizator + probabilities.observed = list(marginal.probs=prima.facie.parents$marginal.probs,joint.probs=prima.facie.parents$joint.probs,conditional.probs=conditional.probs.fit); + probabilities.fit = list(estimated.marginal.probs=estimated.probabilities.fit$marginal.probs,estimated.joint.probs=estimated.probabilities.fit$joint.probs,estimated.conditional.probs=estimated.probabilities.fit$conditional.probs); + probabilities = list(probabilities.observed=probabilities.observed,probabilities.fit=probabilities.fit); + parents.pos = parents.pos.fit; + error.rates = estimated.error.rates.fit; + + # save the results for the model + model[[reg]] = list(probabilities=probabilities,parents.pos=parents.pos,error.rates=error.rates,adj.matrix=best.parents$adj.matrix); + + } + + # set the execution parameters + parameters = list(algorithm="CAPRI",command=command,regularization=regularization,do.boot=do.boot,nboot=nboot,pvalue=pvalue,min.boot=min.boot,min.stat=min.stat,boot.seed=boot.seed,do.estimation=do.estimation,silent=silent); + + # return the results + topology = list(dataset=dataset,hypotheses=hypotheses,adj.matrix.prima.facie=adj.matrix.prima.facie,confidence=prima.facie.parents$pf.confidence,model=model,parameters=parameters,execution.time=(proc.time()-ptm)); + return(topology); + +} + +#' check if the dataset is valid accordingly to the probability raising +#' @title check.dataset +#' @param dataset a dataset describing a progressive phenomenon +#' @param adj.matrix adjacency matrix of the topology +#' @param verbose should I print the warnings? Yes if TRUE, no otherwise +#' @return valid.dataset: a dataset valid accordingly to the probability raising +check.dataset = function( dataset, adj.matrix, verbose ) { + + # perform the preprocessing only if I have at least two binary events and two samples + if(length(ncol(dataset))>0 && ncol(dataset)>1 && length(nrow(dataset))>0 && nrow(dataset)>1 && length(dataset[dataset==0|dataset==1])==nrow(dataset)*ncol(dataset)) { + + # structure to compute the observed and observed joint probabilities + pair.count <- array(0, dim=c(ncol(dataset), ncol(dataset))); + # compute the probabilities on the dataset + for(i in 1:ncol(dataset)) { + for(j in 1:ncol(dataset)) { + val1 = dataset[ ,i]; + val2 = dataset[ ,j]; + pair.count[i,j] = (t(val1) %*% val2); + } + } + # marginal.probs is an array of the observed marginal probabilities + marginal.probs <- array(as.matrix(diag(pair.count)/nrow(dataset)),dim=c(ncol(dataset),1)); + # joint.probs is an array of the observed joint probabilities + joint.probs <- as.matrix(pair.count/nrow(dataset)); + + # evaluate the connections + invalid.events = vector(); + for (i in 1:ncol(adj.matrix)) { + for (j in 1:nrow(adj.matrix)) { + # if i --> j is valid + if(i!=j && adj.matrix[i,j]==1) { + # the potential cause is always present + if(marginal.probs[i]==1) { + # the potential child is not always missing + if(marginal.probs[i]>0) { + adj.matrix[i,j] = 0; + # invalid.events = rbind(invalid.events,t(c(i,j))); + } + # the potential child is always missing + else if(marginal.probs[i]==0) { + adj.matrix[i,j] = 0; + } + } + # the potential cause is always missing + else if(marginal.probs[i]==0) { + adj.matrix[i,j] = 0; + } + # the potential child is always present + else if(marginal.probs[j]==1) { + adj.matrix[i,j] = 0; + } + # the potential child is always missing + else if(marginal.probs[j]==0) { + adj.matrix[i,j] = 0; + } + # the two events are equals + else if((joint.probs[i,j]/marginal.probs[i])==1 && (joint.probs[i,j]/marginal.probs[j])==1) { + adj.matrix[i,j] = 0; + invalid.events = rbind(invalid.events,t(c(i,j))); + } + } + } + } + if(length(invalid.events)>0) { + colnames(invalid.events) = c("cause","effect"); + } + valid.dataset = list(dataset=dataset,adj.matrix=adj.matrix,invalid.events=invalid.events,marginal.probs=marginal.probs,joint.probs=joint.probs); + + } + + #if the dataset is not valid, we stop here + else { + if(verbose==TRUE) { + warning("The dataset must contain at least two binary events and two samples."); + } + valid.dataset = list(dataset=NA,adj.matrix=NA,invalid.events=NA,marginal.probs=NA,joint.probs=NA); + } + + return(valid.dataset); + +} + + +#' compute a robust estimation of the scores using rejection sampling bootstrap +#' @title get.bootstapped.scores +#' @param dataset a valid dataset +#' @param nboot number of bootstrap resampling to be performed +#' @param adj.matrix adjacency matrix of the initially valid edges +#' @param min.boot minimum number of bootstrapping to be performed +#' @param min.stat should I keep bootstrapping untill I have nboot valid values? +#' @param boot.seed seed to be used for the sampling +#' @param silent Should I be verbose? +#' @return scores: list structure with the scores and the data generated by bootstrap +get.bootstapped.scores = function(dataset, + nboot, + adj.matrix, + min.boot = 3, + min.stat = TRUE, + boot.seed = NULL, + silent) + +{ + + # horrible fix... + if(!exists('hide.progress.bar')) { + hide.progress.bar = TRUE + } + + # structures to save the distributions generated by the bootstrapped datasets + marginal.probs.distributions <- array(list(-1), c(ncol(dataset),1)); + joint.probs.distributions <- array(list(-1), c(ncol(dataset),ncol(dataset))); + prima.facie.model.distributions <- array(list(-1), c(ncol(dataset),ncol(dataset))); + prima.facie.null.distributions <- array(list(-1), c(ncol(dataset),ncol(dataset))); + # structures to save the number of performed valid (not rejected) sampling + sampled.marginal.probs.distributions <- array(0, dim=c(ncol(dataset),1)); + sampled.joint.probs.distributions <- array(0, dim=c(ncol(dataset),ncol(dataset))); + sampled.prima.facie.distributions <- array(0, dim=c(ncol(dataset),ncol(dataset))); + + # I require a minimum of min.boot (default = 3) sampling of bootstrap + nboot = max(nboot,min.boot); + + # set not to sample for the invalid edges + for(i in 1:nrow(adj.matrix)) { + for(j in 1:ncol(adj.matrix)) { + if(adj.matrix[i,j]==0) { + sampled.prima.facie.distributions[i,j] = nboot; + } + } + } + + # perform bootstrap estimation based on a number of bootstrapped (>= nboot) datasets + curr.iteration = min(sampled.prima.facie.distributions); + boot.counter = 0; + + # set the seed to be used for the sampling + set.seed(boot.seed); + + if(silent==FALSE && !hide.progress.bar) { + # create a progress bar + flush.console(); + pb <- txtProgressBar(curr.iteration, nboot, style = 3); + } + + while(curr.iteration j + prima.facie.model.distributions[i,j] = list(curr.prima.facie.model[i,j]); + prima.facie.null.distributions[i,j] = list(curr.prima.facie.null[i,j]); + } + else { + # scores for i --> j + prima.facie.model.distributions[i,j] = list(c(unlist(prima.facie.model.distributions[i,j]),curr.prima.facie.model[i,j])); + prima.facie.null.distributions[i,j] = list(c(unlist(prima.facie.null.distributions[i,j]),curr.prima.facie.null[i,j])); + } + } + + } + + } + + # set the number of performed iterations after the last bootstrap sampling + curr.iteration = min(sampled.prima.facie.distributions); + + # if the flag min.stat is FALSE, + # even if after nboot iterations I don't have nboot valid entries, + # as soon as I have at least min.boot values, I stop anyway + if(min.stat==FALSE && boot.counter>=nboot && curr.iteration>=min.boot) { + curr.iteration = nboot; + } + + if(silent==FALSE && !hide.progress.bar) { + #increment the progress bar + if(min.stat==FALSE) { + setTxtProgressBar(pb, boot.counter); + } + else { + setTxtProgressBar(pb, curr.iteration); + } + } + + } + + if(silent==FALSE && !hide.progress.bar) { + # close the progress bar + close(pb); + } + + # save the results and return them + scores <- list(marginal.probs.distributions=marginal.probs.distributions,joint.probs.distributions=joint.probs.distributions,prima.facie.model.distributions=prima.facie.model.distributions,prima.facie.null.distributions=prima.facie.null.distributions); + return(scores); + +} + +#' compute the observed probabilities and the prima facie scores on the dataset +#' @title get.dag.scores +#' @param dataset a valid dataset +#' @param adj.matrix adjacency matrix of the initially valid edges +#' @return scores: observed probabilities and prima facie scores +get.dag.scores = function( dataset, adj.matrix ) { + + # structure to save the prima facie scores + prima.facie.model <- array(-1, dim=c(ncol(dataset), ncol(dataset))); + prima.facie.null <- array(-1, dim=c(ncol(dataset), ncol(dataset))); + # structure to save the observed and observed-joint probabilities + pair.count <- array(0, dim=c(ncol(dataset), ncol(dataset))); + + # compute the observed probabilities on the dataset + for(i in 1:ncol(dataset)) { + for(j in 1:ncol(dataset)) { + val1 = dataset[ ,i]; + val2 = dataset[ ,j]; + pair.count[i,j] = (t(val1) %*% val2); + } + } + # marginal.probs is an array with the marginal probabilities + marginal.probs <- array(as.matrix(diag(pair.count)/nrow(dataset)),dim=c(ncol(dataset),1)); + # joint.probs is an array with the joint observed probabilities + joint.probs <- as.matrix(pair.count/nrow(dataset)); + + # compute the prima facie scores based on the probability raising model + for(i in 1:nrow(prima.facie.model)) { + for(j in 1:ncol(prima.facie.model)) { + # the scores are saved in the convention of the adjacency matrix, i.e., [i,j] means i is causing j + # the diagonal (self cause) and the other invalid edges have not to be considered + if(adj.matrix[i,j]!=0) { + # check if the connections from j to i and from i to j can be evaluated on this dataset + if(marginal.probs[i]>0 && marginal.probs[i]<1 && marginal.probs[j]>0 && marginal.probs[j]<1) { + # check if the two events i and j are distinguishable + if((joint.probs[i,j]/marginal.probs[j])<1 || (joint.probs[i,j]/marginal.probs[i])<1) { + # prima facie scores of i --> j + prima.facie.model[i,j] = joint.probs[j,i]/marginal.probs[i]; + prima.facie.null[i,j] = (marginal.probs[j]-joint.probs[j,i])/(1-marginal.probs[i]); + } + } + } + } + } + + # save the results and return them + scores <- list(marginal.probs=marginal.probs,joint.probs=joint.probs,prima.facie.model=prima.facie.model,prima.facie.null=prima.facie.null); + return(scores); + +} + +#' select the best set of prima facie causes per node +#' @title get.prima.facie.causes.do.boot +#' @param adj.matrix adjacency matrix of the initially valid edges +#' @param hypotheses hypotheses to be considered +#' @param marginal.probs.distributions distributions of the bootstrapped marginal probabilities +#' @param prima.facie.model.distributions distributions of the prima facie model +#' @param prima.facie.null.distributions distributions of the prima facie null +#' @param pvalue minimum pvalue for the Mann-Whitney U tests to be significant +#' @param dataset a valid dataset +#' @param marginal.probs observed marginal probabilities +#' @param joint.probs observed joint probabilities +#' @param silent Should I be verbose? +#' @return prima.facie.topology: list describing the topology of the prima facie causes +get.prima.facie.causes.do.boot = function(adj.matrix, + hypotheses, + marginal.probs.distributions, + prima.facie.model.distributions, + prima.facie.null.distributions, + pvalue, + dataset, + marginal.probs, + joint.probs, + silent = FALSE ) + +{ + + # structure to save the confidence of the edges + edge.confidence.matrix <- array(list(), c(3,1)); + edge.confidence.matrix[[1,1]] = array(NA, c(ncol(prima.facie.model.distributions),ncol(prima.facie.model.distributions))); + edge.confidence.matrix[[2,1]] = array(NA, c(ncol(prima.facie.model.distributions),ncol(prima.facie.model.distributions))); + edge.confidence.matrix[[3,1]] = array(NA, c(ncol(prima.facie.model.distributions),ncol(prima.facie.model.distributions))); + + # verify Suppes' conditions for prima facie causes + # i.e., i --> j implies P(i)>P(j) (temporal priority) and P(j|i)>P(j|not i) (probability raising) + # verify the temporal priority condition + if(!silent) cat(paste0('\tEvaluating \"temporal priority\" (Wilcoxon, p-value ', pvalue, ')\n')); + temporal.priority = verify.temporal.priority.do.boot(marginal.probs.distributions,pvalue,adj.matrix,edge.confidence.matrix); + + # verify the probability raising condition + if(!silent) cat(paste0('\tEvaluating \"probability raising\" (Wilcoxon, p-value ', pvalue, ')\n')); + probability.raising = verify.probability.raising.do.boot(prima.facie.model.distributions,prima.facie.null.distributions,pvalue,temporal.priority$adj.matrix,temporal.priority$edge.confidence.matrix); + + # perform the hypergeometric test for each pair of events + for(i in 1:ncol(adj.matrix)) { + for(j in i:nrow(adj.matrix)) { + + # the diagonal (self cause) and the other invalid edges have not to be considered + if(adj.matrix[i,j]!=0 || adj.matrix[j,i]!=0) { + #compute the confidence by hypergeometric test for both j --> i and i --> j + probability.raising$edge.confidence.matrix[[3,1]][i,j] = phyper(joint.probs[i,j]*nrow(dataset),marginal.probs[i]*nrow(dataset),nrow(dataset)-marginal.probs[i]*nrow(dataset),marginal.probs[j]*nrow(dataset),lower.tail=FALSE); + probability.raising$edge.confidence.matrix[[3,1]][j,i] = probability.raising$edge.confidence.matrix[[3,1]][i,j]; + } + else { + probability.raising$edge.confidence.matrix[[3,1]][i,j] = 1; + probability.raising$edge.confidence.matrix[[3,1]][j,i] = 1; + } + + } + } + + # remove any cycle + adj.matrix.cyclic = probability.raising$adj.matrix + if(length(temporal.priority$not.ordered)>0 || !is.na(hypotheses[1])) { + + if(!silent) cat('*** Loop detection found loops to break.\n') + + weights.temporal.priority = probability.raising$edge.confidence.matrix[[1,1]]+probability.raising$edge.confidence.matrix[[2,1]]; + weights.matrix = probability.raising$edge.confidence.matrix[[2,1]]+probability.raising$edge.confidence.matrix[[3,1]]; + acyclic.topology = remove.cycles(probability.raising$adj.matrix,weights.temporal.priority,weights.matrix,temporal.priority$not.ordered,hypotheses,silent); + adj.matrix.acyclic = acyclic.topology$adj.matrix; + + } + else { + adj.matrix.acyclic = probability.raising$adj.matrix; + } + adj.matrix = list(adj.matrix.cyclic=adj.matrix.cyclic,adj.matrix.acyclic=adj.matrix.acyclic) + + # save the results and return them + prima.facie.topology <- list(adj.matrix=adj.matrix,edge.confidence.matrix=probability.raising$edge.confidence.matrix); + return(prima.facie.topology); + +} + +#' select the best set of prima facie causes per node without bootstrap +#' @title get.prima.facie.causes.no.boot +#' @param adj.matrix adjacency matrix of the initially valid edges +#' @param hypotheses hypotheses object related to adjacency matrix +#' @param marginal.probs observed marginal probabilities +#' @param prima.facie.model prima facie model +#' @param prima.facie.null prima facie null +#' @param dataset a valid dataset +#' @param joint.probs observed joint probabilities +#' @param silent Should I be verbose? +#' @return prima.facie.topology: adjacency matrix of the prima facie causes +get.prima.facie.causes.no.boot = function(adj.matrix, + hypotheses, + marginal.probs, + prima.facie.model, + prima.facie.null, + dataset, + joint.probs, + silent = FALSE ) + +{ + + # structure to save the confidence of the edges + edge.confidence.matrix <- array(list(), c(3,1)); + edge.confidence.matrix[[1,1]] = array(NA, c(ncol(adj.matrix),ncol(adj.matrix))); + edge.confidence.matrix[[2,1]] = array(NA, c(ncol(adj.matrix),ncol(adj.matrix))); + edge.confidence.matrix[[3,1]] = array(NA, c(ncol(adj.matrix),ncol(adj.matrix))); + + # verify Suppes' conditions for prima facie causes + # i.e., i --> j implies P(i)>P(j) (temporal priority) and P(j|i)>P(j|not i) (probability raising) + # verify the temporal priority condition + if(!silent) cat(paste0('\tEvaluating \"temporal priority\".\n')); + temporal.priority = verify.temporal.priority.no.boot(marginal.probs,adj.matrix,edge.confidence.matrix); + + # verify the probability raising condition + if(!silent) cat(paste0('\tEvaluating \"probability raising\".\n')); + probability.raising = verify.probability.raising.no.boot(prima.facie.model,prima.facie.null,temporal.priority$adj.matrix,temporal.priority$edge.confidence.matrix); + + # perform the hypergeometric test for each pair of events + for(i in 1:ncol(adj.matrix)) { + for(j in i:nrow(adj.matrix)) { + + # the diagonal (self cause) and the other invalid edges have not to be considered + if(adj.matrix[i,j]!=0 || adj.matrix[j,i]!=0) { + #compute the confidence by hypergeometric test for both j --> i and i --> j + probability.raising$edge.confidence.matrix[[3,1]][i,j] = phyper(joint.probs[i,j]*nrow(dataset),marginal.probs[i]*nrow(dataset),nrow(dataset)-marginal.probs[i]*nrow(dataset),marginal.probs[j]*nrow(dataset),lower.tail=FALSE); + probability.raising$edge.confidence.matrix[[3,1]][j,i] = probability.raising$edge.confidence.matrix[[3,1]][i,j]; + } + else { + probability.raising$edge.confidence.matrix[[3,1]][i,j] = 1; + probability.raising$edge.confidence.matrix[[3,1]][j,i] = 1; + } + + } + } + + # remove any cycle + adj.matrix.cyclic = probability.raising$adj.matrix + if(length(temporal.priority$not.ordered)>0 || !is.na(hypotheses[1])) { + + if(!silent) cat('*** Loop detection found loops to break.\n') + + weights.temporal.priority = probability.raising$edge.confidence.matrix[[2,1]]; + weights.matrix = probability.raising$edge.confidence.matrix[[2,1]] + probability.raising$edge.confidence.matrix[[3,1]]; + acyclic.topology = remove.cycles(probability.raising$adj.matrix,weights.temporal.priority,weights.matrix,temporal.priority$not.ordered,hypotheses,silent); + adj.matrix.acyclic = acyclic.topology$adj.matrix; + + } + else { + adj.matrix.acyclic = probability.raising$adj.matrix; + } + adj.matrix = list(adj.matrix.cyclic=adj.matrix.cyclic,adj.matrix.acyclic=adj.matrix.acyclic) + + # save the results and return them + prima.facie.topology <- list(adj.matrix=adj.matrix,edge.confidence.matrix=probability.raising$edge.confidence.matrix); + return(prima.facie.topology); + +} + +#' select the set of the prima facie parents (with bootstrap) for each node +#' based on Suppes' definition of causation +#' @title get.prima.facie.parents.do.boot +#' @param dataset a valid dataset +#' @param hypotheses hypotheses object related to dataset +#' @param nboot integer number (greater than 0) of bootstrap sampling to be performed +#' @param pvalue pvalue for the tests (value between 0 and 1) +#' @param adj.matrix adjacency matrix of the initially valid edges +#' @param min.boot minimum number of bootstrapping to be performed +#' @param min.stat should I keep bootstrapping untill I have nboot valid values? +#' @param boot.seed seed to be used for the sampling +#' @param silent Should I be verbose? +#' @return prima.facie.parents list of the set (if any) of prima facie parents for each node +get.prima.facie.parents.do.boot = function(dataset, + hypotheses, + nboot, + pvalue, + adj.matrix, + min.boot, + min.stat, + boot.seed, + silent ) +{ + + # perform a robust estimation of the scores using rejection sampling bootstrap + scores = get.bootstapped.scores(dataset,nboot,adj.matrix,min.boot,min.stat,boot.seed,silent); + + # compute the observed and joint probabilities as the mean of the bootstrapped values + marginal.probs = array(-1,dim=c(ncol(dataset),1)); + joint.probs = array(-1,dim=c(ncol(dataset),ncol(dataset))); + for(i in 1:ncol(dataset)) { + marginal.probs[i,1] = mean(unlist(scores$marginal.probs.distributions[i,1])); + for(j in i:ncol(dataset)) { + joint.probs[i,j] = mean(unlist(scores$joint.probs.distributions[i,j])); + if(i!=j) { + joint.probs[j,i] = joint.probs[i,j]; + } + } + } + + # remove all the edges not representing a prima facie cause + prima.facie.topology = get.prima.facie.causes.do.boot(adj.matrix,hypotheses,scores$marginal.probs.distributions,scores$prima.facie.model.distributions,scores$prima.facie.null.distributions,pvalue,dataset,marginal.probs,joint.probs,silent); + + # save the results and return them + prima.facie.parents <- list(marginal.probs=marginal.probs,joint.probs=joint.probs,adj.matrix=prima.facie.topology$adj.matrix,pf.confidence=prima.facie.topology$edge.confidence.matrix); + return(prima.facie.parents); + +} + + +#' select the set of the prima facie parents (without bootstrap) for each node based on Suppes' definition of causation +#' @title get.prima.facie.parents.no.boot +#' @param dataset a valid dataset +#' @param hypotheses hypotheses object associated to dataset +#' @param adj.matrix adjacency matrix of the initially valid edges +#' @param silent Should I be verbose? +#' @return prima.facie.parents: list of the set (if any) of prima facie parents for each node +get.prima.facie.parents.no.boot = function( dataset, hypotheses, adj.matrix, silent ) { + + # compute the scores from the dataset + scores = get.dag.scores(dataset,adj.matrix); + + # remove all the edges not representing a prima facie causes + prima.facie.topology = get.prima.facie.causes.no.boot(adj.matrix,hypotheses,scores$marginal.probs,scores$prima.facie.model,scores$prima.facie.null,dataset,scores $joint.probs,silent); + + # save the results return them + prima.facie.parents <- list(marginal.probs=scores$marginal.probs,joint.probs=scores$joint.probs,adj.matrix=prima.facie.topology$adj.matrix,pf.confidence=prima.facie.topology$edge.confidence.matrix); + return(prima.facie.parents); + +} + +#' reconstruct the best causal topology by likelihood fit +#' @title perform.likelihood.fit +#' @param dataset a valid dataset +#' @param adj.matrix the adjacency matrix of the prima facie causes +#' @param command type of search, either hill climbing (hc) or tabu (tabu) +#' @param regularization regularization term to be used in the likelihood fit +#' @return topology: the adjacency matrix of both the prima facie and causal topologies +perform.likelihood.fit = function( dataset, adj.matrix, command, regularization ) { + + # each variable should at least have 2 values: I'm ignoring connection to invalid events + # but, still, need to make the dataset valid for bnlearn + for (i in 1:ncol(dataset)) { + if(sum(dataset[,i])==0) { + dataset[1,i] = 1; + } + else if(sum(dataset[,i])==nrow(dataset)) { + dataset[1,i] = 0; + } + } + + # load the bnlearn library required for the likelihood fit with regularizator + + + # adjacency matrix of the topology reconstructed by likelihood fit + adj.matrix.fit = array(0,c(nrow(adj.matrix),ncol(adj.matrix))); + + # create a categorical data frame from the dataset + data = array("missing",c(nrow(dataset),ncol(dataset))); + for (i in 1:nrow(dataset)) { + for (j in 1:ncol(dataset)) { + if(dataset[i,j]==1) { + data[i,j] = "observed"; + } + } + } + data = as.data.frame(data); + my.names = names(data); + for (i in 1:length(my.names)) { + my.names[i] = toString(i); + } + names(data) = my.names; + + # create the blacklist based on the prima facie topology + cont = 0; + parent = -1; + child = -1; + for (i in 1:nrow(adj.matrix)) { + for (j in 1:ncol(adj.matrix)) { + if(i!=j) { + if(adj.matrix[i,j]==0) { + # [i,j] refers to causation i --> j + cont = cont + 1; + if(cont==1) { + parent = toString(i); + child = toString(j); + } + else { + parent = c(parent,toString(i)); + child = c(child,toString(j)); + } + } + } + } + } + + # perform the reconstruction by likelihood fit with regularization + # either the hill climbing or the tabu search is used as the mathematical optimization technique + + # cat('Performing likelihood-fit with regularization:', regularization, '(bnlearn)\n'); + # cat('Heuristic search method:', command, '(bnlearn)\n'); + + if(cont>0) { + blacklist = data.frame(from = parent,to = child); + if(command=="hc") { + my.net = hc(data,score= regularization,blacklist=blacklist); + } + else if(command=="tabu") { + my.net = tabu(data,score= regularization,blacklist=blacklist); + } + } + else { + if(command=="hc") { + my.net = hc(data,score= regularization); + } + else if(command=="tabu") { + my.net = tabu(data,score= regularization); + } + } + my.arcs = my.net$arcs; + + # build the adjacency matrix of the reconstructed topology + if(length(nrow(my.arcs))>0 && nrow(my.arcs)>0) { + for (i in 1:nrow(my.arcs)) { + # [i,j] refers to causation i --> j + adj.matrix.fit[as.numeric(my.arcs[i,1]),as.numeric(my.arcs[i,2])] = 1; + } + } + + # save the results and return them + adj.matrix = list(adj.matrix.pf=adj.matrix,adj.matrix.fit=adj.matrix.fit); + topology = list(adj.matrix=adj.matrix); + return(topology); + +} + +#' remove any cycle from a given cyclic topology +#' @title remove.cycles +#' @param adj.matrix adjacency matrix of the topology +#' @param weights.temporal.priority weighted matrix to be used to remove the cycles involving atomic events +#' @param weights.matrix weighted matrix to be used to remove the cycles involving hypotheses +#' @param not.ordered list of the nodes to be orderd +#' @param hypotheses hypotheses to evaluate potential cycles +#' @param silent Should I be verbose? +#' @return acyclic.topology: structure representing the best acyclic topology +remove.cycles = function(adj.matrix, + weights.temporal.priority, + weights.matrix, + not.ordered, + hypotheses = NA, + silent ) + +{ + + + total.edges = length(which(adj.matrix == 1)) + removed = 0 + + # evaluate the possible cycles involving atomic events + if(length(not.ordered)>0) { + + # consider only the edges that were not ordered by temporal priority + curr.edge.pos = 0; + for(i in 1:length(not.ordered)) { + + # consider the events i and j + curr.edge = not.ordered[[i]]; + curr.edge.i = curr.edge[1,1]; + curr.edge.j = curr.edge[2,1]; + + # check if i and j still create a cycle + if(adj.matrix[curr.edge.i,curr.edge.j]==1 && adj.matrix[curr.edge.j,curr.edge.i]==1) { + + # get the scores of the two edges + curr.score.i.j = weights.temporal.priority[curr.edge.i,curr.edge.j]; + curr.score.j.i = weights.temporal.priority[curr.edge.j,curr.edge.i]; + + # choose an edge based on the score + if(curr.score.i.j j is more confident (lower score) then j --> i + removed = removed + 1 + # cat("Removing edge ",colnames(adj.matrix)[curr.edge.j]," to ",colnames(adj.matrix)[curr.edge.i],"\n"); + adj.matrix[curr.edge.j,curr.edge.i] = 0; + } + else { + # otherwise + removed = removed + 1 + # cat("Removing edge ",colnames(adj.matrix)[curr.edge.i]," to ",colnames(adj.matrix)[curr.edge.j],"\n"); + adj.matrix[curr.edge.i,curr.edge.j] = 0; + } + } + + } + + } + + # create the structures where to save the weights in increasing order of confidence + ordered.weights <- vector(); + ordered.edges <- list(); + + # consider the patterns related the hypotheses + if(!is.na(hypotheses[1])) { + + # if I have hypotheses, add the edges to be evaluated during the loop removal + curr.edge.pos = 0; + for(i in 1:nrow(adj.matrix)) { + for(j in 1:nrow(adj.matrix)) { + if(adj.matrix[i,j]==1) { + ordered.weights = rbind(ordered.weights,weights.matrix[i,j]); + curr.edge.pos = curr.edge.pos + 1; + new.edge <- array(0, c(2,1)); + new.edge[1,1] = i; + new.edge[2,1] = j; + ordered.edges[curr.edge.pos] = list(new.edge); + } + } + } + + # sort the edges in increasing order of confidence (i.e. the edges with lower pvalue are the most confident) + ordered.edges = ordered.edges[sort(unlist(ordered.weights),decreasing=TRUE,index.return=TRUE)$ix]; + + } + + # visit the ordered edges and remove the ones that are causing any cycle + if(length(ordered.edges)>0) { + + # expanded matrix to be considered in removing the loops + expansion = hypotheses.expansion(input_matrix=adj.matrix,map=hypotheses$hstructure,hidden_and=F,expand=T,skip.disconnected=F); + + + for(i in 1:length(ordered.edges)) { + + # consider the edge i-->j + curr.edge = ordered.edges[[i]]; + curr.edge.i = curr.edge[1,1]; + curr.edge.j = curr.edge[2,1]; + + # resolve the mapping from the adj.matrix to the expanded one both for curr.edge.i and curr.edge.j + if(colnames(adj.matrix)[curr.edge.i]%in%expansion[[2]]) { + curr.edge.i.exp = which(colnames(expansion[[1]])%in%names(expansion[[2]])[which(expansion[[2]]%in%colnames(adj.matrix)[curr.edge.i])]); + } else { + curr.edge.i.exp = which(colnames(expansion[[1]])%in%colnames(adj.matrix)[curr.edge.i]); + } + if(colnames(adj.matrix)[curr.edge.j]%in%expansion[[2]]) { + curr.edge.j.exp = which(colnames(expansion[[1]])%in%names(expansion[[2]])[which(expansion[[2]]%in%colnames(adj.matrix)[curr.edge.j])]); + } else { + curr.edge.j.exp = which(colnames(expansion[[1]])%in%colnames(adj.matrix)[curr.edge.j]); + } + + # search for loops between curr.edge.i and curr.edge.j + curr.graph = graph.adjacency(expansion[[1]], mode="directed") + + is.path = length(unlist(get.shortest.paths(curr.graph, curr.edge.j.exp, curr.edge.i.exp)$vpath)); + + + # if there is a path between the two nodes, remove edge i --> j + if(is.path>0) { + removed = removed + 1 + # cat("Removing edge ",colnames(adj.matrix)[curr.edge.i]," to ",colnames(adj.matrix)[curr.edge.j],"\n"); + expansion[[1]][curr.edge.i.exp,curr.edge.j.exp] = 0; + adj.matrix[curr.edge.i,curr.edge.j] = 0; + } + + } + + if(!silent) cat(paste0('\tRemoved ', removed, ' edges out of ', total.edges ,' (', round(100 * removed/total.edges, 0),'%)\n')) + } + + # save the results and return them + acyclic.topology = list(adj.matrix=adj.matrix); + return(acyclic.topology); + +} + +#' verify the probability raising condition +#' @title verify.probability.raising.do.boot +#' @param prima.facie.model.distributions distributions of the prima facie model +#' @param prima.facie.null.distributions distributions of the prima facie null +#' @param pvalue minimum pvalue for the Mann-Whitney U tests to be significant +#' @param adj.matrix adjacency matrix of the topology +#' @param edge.confidence.matrix matrix of the confidence of each edge +#' @return probability.raising: list describing the causes where probability raising is verified +verify.probability.raising.do.boot = function(prima.facie.model.distributions, + prima.facie.null.distributions, + pvalue, + adj.matrix, + edge.confidence.matrix ) + +{ + + # evaluate the probability raising condition + for(i in 1:nrow(adj.matrix)) { + for(j in i:ncol(adj.matrix)) { + + # the diagonal (self cause) and the other invalid edges have not to be considered + if(adj.matrix[i,j]!=0 || adj.matrix[j,i]!=0) { + + # pvalue for the probability raising condition for i --> j + second.pvalue.i.j = wilcox.test(unlist(prima.facie.model.distributions[i,j]),unlist(prima.facie.null.distributions[i,j]),alternative="greater",mu=0)$p.value; + if(is.na(second.pvalue.i.j) || is.nan(second.pvalue.i.j)) { + # in this case the two distributions are exactly identical + second.pvalue.i.j = 1; + } + + # in this case i --> j is not valid + if(second.pvalue.i.j>=pvalue) { + adj.matrix[i,j] = 0; + } + + # pvalue for the probability raising condition for j --> i + second.pvalue.j.i = wilcox.test(unlist(prima.facie.model.distributions[j,i]),unlist(prima.facie.null.distributions[j,i]),alternative="greater",mu=0)$p.value; + if(is.na(second.pvalue.j.i) || is.nan(second.pvalue.j.i)) { + # in this case the two distributions are exactly identical + second.pvalue.j.i = 1; + } + + # in this case j --> i is not valid + if(second.pvalue.j.i>=pvalue) { + adj.matrix[j,i] = 0; + } + + # save the confidence for i-->j and j --> i + tmp = edge.confidence.matrix[[2,1]]; + tmp[i,j] = second.pvalue.i.j; + tmp[j,i] = second.pvalue.j.i; + edge.confidence.matrix[2,1] = list(tmp); + + } + else { + tmp = edge.confidence.matrix[[2,1]]; + tmp[i,j] = 1; + tmp[j,i] = 1; + edge.confidence.matrix[2,1] = list(tmp); + } + + } + + } + + # save the results and return them + probability.raising <- list(adj.matrix=adj.matrix,edge.confidence.matrix=edge.confidence.matrix); + return(probability.raising); + +} + + +#' verify the probability raising condition without bootstrap +#' @title verify.probability.raising.no.boot +#' @param prima.facie.model prima facie model +#' @param prima.facie.null prima facie null +#' @param adj.matrix adjacency matrix of the topology +#' @param edge.confidence.matrix matrix of the confidence of each edge +#' @return probability.raising: adjacency matrix where temporal priority is verified +verify.probability.raising.no.boot = function(prima.facie.model, + prima.facie.null, + adj.matrix, + edge.confidence.matrix ) + +{ + + # evaluate the probability raising condition + for(i in 1:nrow(adj.matrix)) { + for(j in i:ncol(adj.matrix)) { + + # the diagonal (self cause) and the other invalid edges have not to be considered + # probability raising condition: if P(j|i)>P(j|not i) the edge i --> j is valid for probability raising + if(adj.matrix[i,j]!=0 || adj.matrix[j,i]!=0) { + + # in this case i --> j is not valid + if(prima.facie.model[i,j]<=prima.facie.null[i,j]) { + adj.matrix[i,j] = 0; + } + + # in this case j --> i is not valid + if(prima.facie.model[j,i]<=prima.facie.null[j,i]) { + adj.matrix[j,i] = 0; + } + + # save the confidence for i-->j and j --> i + tmp = edge.confidence.matrix[[2,1]]; + tmp[i,j] = min(prima.facie.null[i,j]/prima.facie.model[i,j],1); + tmp[j,i] = min(prima.facie.null[j,i]/prima.facie.model[j,i],1); + edge.confidence.matrix[2,1] = list(tmp); + + } + else { + tmp = edge.confidence.matrix[[2,1]]; + tmp[i,j] = 1; + tmp[j,i] = 1; + edge.confidence.matrix[2,1] = list(tmp); + } + + } + } + + # save the results and return them + probability.raising <- list(adj.matrix=adj.matrix,edge.confidence.matrix=edge.confidence.matrix); + return(probability.raising); +} + +#' verify the temporal priority condition with bootstrap +#' @title verify.temporal.priority.do.boot +#' @param marginal.probs.distributions distributions of the bootstrapped marginal probabilities +#' @param pvalue minimum pvalue for the Mann-Whitney U tests to be significant +#' @param adj.matrix adjacency matrix of the topology +#' @param edge.confidence.matrix matrix of the confidence of each edge +#' @return temporal.priority: list describing the causes where temporal priority is verified +verify.temporal.priority.do.boot = function(marginal.probs.distributions, + pvalue, + adj.matrix, + edge.confidence.matrix ) + +{ + + # evalutate the temporal priority condition for each pair of edges + not.ordered = list(); + counter = 0; + for(i in 1:nrow(adj.matrix)) { + for(j in i:ncol(adj.matrix)) { + + # the diagonal (self cause) and the other invalid edges have not to be considered + if(adj.matrix[i,j]!=0 || adj.matrix[j,i]!=0) { + + # [i,j] refers to causation i --> j + # temporal priority condition: if P(i)>P(j) the edge i --> j is valid for temporal priority + # test i --> j + first.pvalue.i.j = wilcox.test(unlist(marginal.probs.distributions[i,1]),unlist(marginal.probs.distributions[j,1]),alternative="greater",mu=0)$p.value; + if(is.na(first.pvalue.i.j) || is.nan(first.pvalue.i.j)) { + # in this case the two distributions are exactly identical + first.pvalue.i.j = 1; + } + + # test j --> i + first.pvalue.j.i = wilcox.test(unlist(marginal.probs.distributions[j,1]),unlist(marginal.probs.distributions[i,1]),alternative="greater",mu=0)$p.value; + if(is.na(first.pvalue.j.i) || is.nan(first.pvalue.j.i)) { + # in this case the two distributions are exactly identical + first.pvalue.j.i = 1; + } + + # in this case i is before j and j --> i is not valid + if(first.pvalue.j.i>=pvalue && first.pvalue.i.j j is not valid + else if(first.pvalue.j.i=pvalue) { + # [i,j] = 0 means i is after j, i.e. it can not be causing j + adj.matrix[i,j] = 0; + } + # in this case, a total time order between i and j can not be defined + else { + # no temporal priority induced by the topology can be inferred + counter = counter + 1; + curr.not.ordered = array(-1, c(2,1)); + curr.not.ordered[1,1] = i; + curr.not.ordered[2,1] = j; + not.ordered[counter] = list(curr.not.ordered); + } + + # save the confidence for i --> j and j --> i + tmp = edge.confidence.matrix[[1,1]]; + tmp[i,j] = first.pvalue.i.j; + tmp[j,i] = first.pvalue.j.i; + edge.confidence.matrix[1,1] = list(tmp); + + } + else { + tmp = edge.confidence.matrix[[1,1]]; + tmp[i,j] = 1; + tmp[j,i] = 1; + edge.confidence.matrix[1,1] = list(tmp); + } + + } + } + + # save the results and return them + temporal.priority <- list(adj.matrix=adj.matrix,edge.confidence.matrix=edge.confidence.matrix,not.ordered=not.ordered); + return(temporal.priority); + +} + + +#' verify the temporal priority condition without bootstrap +#' @title verify.temporal.priority.no.boot +#' @param marginal.probs marginal probabilities +#' @param adj.matrix adjacency matrix of the topology +#' @param edge.confidence.matrix matrix of the confidence of each edge +#' @return temporal.priority: adjacency matrix where temporal priority is verified +verify.temporal.priority.no.boot = function(marginal.probs, + adj.matrix, + edge.confidence.matrix ) + +{ + + # evalutate the temporal priority condition for each pair of edges + not.ordered = list(); + counter = 0; + for(i in 1:nrow(adj.matrix)) { + for(j in i:ncol(adj.matrix)) { + + # the diagonal (self cause) and the other invalid edges have not to be considered + if(adj.matrix[i,j]!=0 || adj.matrix[j,i]!=0) { + + # [i,j] refers to causation i --> j + # temporal priority condition: if P(i)>P(j) the edge i --> j is valid for temporal priority + # in this case i is before j and j --> i is not valid + if(marginal.probs[i,1]>marginal.probs[j,1]) { + # [j,i] = 0 means j is after i, i.e. it can not be causing i + adj.matrix[j,i] = 0; + } + # in this case j is before i and i --> j is not valid + else if(marginal.probs[j,1]>marginal.probs[i,1]) { + # [i,j] = 0 means i is after j, i.e. it can not be causing j + adj.matrix[i,j] = 0; + } + # in this case, a total time order between i and j can not be defined + else { + # no temporal priority induced by the topology can be inferred + counter = counter + 1; + curr.not.ordered = array(-1, c(2,1)); + curr.not.ordered[1,1] = i; + curr.not.ordered[2,1] = j; + not.ordered[counter] = list(curr.not.ordered); + } + + # save the confidence for i --> j and j --> i + tmp = edge.confidence.matrix[[1,1]]; + tmp[i,j] = min(marginal.probs[j,1]/marginal.probs[i,1],1); + tmp[j,i] = min(marginal.probs[i,1]/marginal.probs[j,1],1); + edge.confidence.matrix[1,1] = list(tmp); + + } + else { + tmp = edge.confidence.matrix[[1,1]]; + tmp[i,j] = 1; + tmp[j,i] = 1; + edge.confidence.matrix[1,1] = list(tmp); + } + + } + } + + # save the results and return them + temporal.priority <- list(adj.matrix=adj.matrix,edge.confidence.matrix=edge.confidence.matrix,not.ordered=not.ordered); + return(temporal.priority); + +} diff --git a/R/capri.bootstrap.R b/R/capri.bootstrap.R new file mode 100644 index 00000000..58b98da7 --- /dev/null +++ b/R/capri.bootstrap.R @@ -0,0 +1,458 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + + +#' perform non-parametric or parametric bootstrap to evalutate the confidence of the reconstruction +#' @title bootstrap.capri +#' @param dataset a dataset describing a progressive phenomenon +#' @param hypotheses a set of hypotheses referring to the dataset +#' @param command.capri type of search, either hill climbing (hc) or tabu (tabu) +#' @param regularization regularizators to be used for the likelihood fit +#' @param do.boot should I perform bootstrap? Yes if TRUE, no otherwise +#' @param nboot.capri integer number (greater than 0) of bootstrap sampling to be performed +#' @param pvalue pvalue for the tests (value between 0 and 1) +#' @param min.boot minimum number of bootstrapping to be performed +#' @param min.stat should I keep bootstrapping untill I have nboot valid values? +#' @param boot.seed seed to be used for the sampling +#' @param do.estimation should I perform the estimation of the error rates and probabilities? +#' @param silent should I be verbose? +#' @param reconstruction todo +#' @param command should I perform non-parametric or parametric bootstrap? +#' @param nboot number of bootstrap resampling to be performed +#' @param bootstrap.statistics todo +#' @param verbose todo +#' @return bootstrap.statistics: statistics of the bootstrap +bootstrap.capri = function(dataset, + hypotheses, + command.capri, + regularization, + do.boot, + nboot.capri, + pvalue, + min.boot, + min.stat, + boot.seed, + do.estimation, + silent, + reconstruction, + command = "non-parametric", + nboot = 100, + bootstrap.statistics = list(), + verbose = FALSE) +{ + + # start the clock to measure the execution time + + #library(doParallel) + + ptm <- proc.time(); + + # structure to save the results of the bootstrap + curr.bootstrap.results = array(list(-1), c(nboot,nevents(reconstruction))) + colnames(curr.bootstrap.results) = rownames(as.events(reconstruction)) + bootstrap.results = list() + bootstrap.results[names(as.models(reconstruction))] = list(curr.bootstrap.results) + + curr.bootstrap.adj.matrix = array(list(0), c(nevents(reconstruction)+1,nevents(reconstruction)+1)) + colnames(curr.bootstrap.adj.matrix) = c("None",rownames(as.events(reconstruction))) + rownames(curr.bootstrap.adj.matrix) = c("None",rownames(as.events(reconstruction))) + bootstrap.adj.matrix = list() + bootstrap.adj.matrix[names(as.models(reconstruction))] = list(curr.bootstrap.adj.matrix) + + bootstrap.adj.matrix.frequency = list() + bootstrap.adj.matrix.frequency[names(as.models(reconstruction))] = list(curr.bootstrap.adj.matrix) + + curr.edge.confidence = array(list(0), c(nevents(reconstruction),nevents(reconstruction))) + colnames(curr.edge.confidence) = rownames(as.events(reconstruction)) + rownames(curr.edge.confidence) = rownames(as.events(reconstruction)) + bootstrap.edge.confidence = list() + bootstrap.edge.confidence[names(as.models(reconstruction))] = list(curr.edge.confidence) + + overall.confidence = list() + overall.confidence[names(as.models(reconstruction))] = list(0) + overall.frequency = list() + overall.frequency[names(as.models(reconstruction))] = list(0) + + + + cores = detectCores() - 1 + if(cores < 1) { + cores = 1 + } + + expected.execution.time = round(((reconstruction$execution.time[3]*nboot)/(cores)),digits=0) + cat("Expected completion in approx.",format(.POSIXct(expected.execution.time,tz="GMT"),"%Hh:%Mm:%Ss"),"\n") + + if(!verbose) { + cl = makeCluster(cores) + } else { + cl = makeCluster(cores, outfile="") + } + + registerDoParallel(cl) + cat('*** Using', cores, 'cores via "parallel" \n') + + # perform nboot bootstrap resampling + #for (num in 1:nboot + + # if parametric bootstrap selected prepare input + + if(command == 'parametric') { + + # structure to save the samples probabilities + samples.probabilities = list(); + + # define the possible samples given the current number of events + possible.strings = 2 ^ ncol(dataset) + + err = "" + message = "Too many events in the dataset! Parametric bootstrastap can not be performed." + err = tryCatch({ + curr.dataset = suppressWarnings(array(0, c(possible.strings, ncol(dataset)))) + }, error = function(e) { + err <- message + }) + + + if(toString(err) == message) { + stop(err, call. = FALSE) + } + + for (i in 1:possible.strings) { + curr.dataset[i, ] = decimal.to.binary.dag(i - 1, ncol(dataset)) + } + + colnames(curr.dataset) = colnames(dataset) + + for (m in names(as.models(reconstruction))) { + + # estimate the samples probabilities for each model + samples.probabilities[m] = list(estimate.dag.samples(curr.dataset, + as.adj.matrix(reconstruction,model=m)[[m]], + as.marginal.probs(reconstruction,model=m,type="fit")[[m]], + as.conditional.probs(reconstruction,model=m,type="fit")[[m]], + as.parents.pos(reconstruction,model=m)[[m]], + as.error.rates(reconstruction,model=m)[[m]])) + + } + + } + + # r = foreach(num = 1:nboot, .export =ls(env = .GlobalEnv) ) %dopar% { + r = foreach(num = 1:nboot ) %dopar% { + curr.iteration = num + + # performed the bootstrapping procedure + if(command == "non-parametric") { + + # perform the sampling for the current step of bootstrap + samples = sample(1:nrow(dataset), size = nrow(dataset), replace = TRUE) + bootstrapped.dataset = dataset[samples,] + + curr.reconstruction = list() + curr.reconstruction$genotypes = bootstrapped.dataset + curr.reconstruction$annotations = reconstruction$annotations + curr.reconstruction$types = reconstruction$types + curr.reconstruction$hypotheses = hypotheses + + # perform the reconstruction on the bootstrapped dataset + bootstrapped.topology = tronco.capri(curr.reconstruction, + command.capri, + regularization, + do.boot, + nboot.capri, + pvalue, + min.boot, + min.stat, + boot.seed, + do.estimation, + silent) + + curr.reconstruction = bootstrapped.topology; + + } else if(command=="parametric") { + + # perform the reconstruction for each model + new.reconstruction = reconstruction; + new.reconstruction$model = list(); + for (m in names(as.models(reconstruction))) { + # perform the sampling for the current step of bootstrap and regularizator + samples = sample(1:nrow(curr.dataset), + size = nrow(dataset), + replace = TRUE, + prob = samples.probabilities[[m]]) + + bootstrapped.dataset = curr.dataset[samples,] + + curr.reconstruction = list() + curr.reconstruction$genotypes = bootstrapped.dataset + curr.reconstruction$annotations = reconstruction$annotations + curr.reconstruction$types = reconstruction$types + curr.reconstruction$hypotheses = hypotheses + + # perform the reconstruction on the bootstrapped dataset + bootstrapped.topology = tronco.capri(curr.reconstruction, + command.capri, + m, + do.boot, + nboot.capri, + pvalue, + min.boot, + min.stat, + boot.seed, + do.estimation, + silent) + + # save the results for this model + new.reconstruction$model[m] = as.models(bootstrapped.topology,models=m) + + } + curr.reconstruction = new.reconstruction; + + } else if(command == "statistical") { + + curr.reconstruction = list() + curr.reconstruction$genotypes = reconstruction$genotypes; + curr.reconstruction$annotations = reconstruction$annotations; + curr.reconstruction$types = reconstruction$types; + curr.reconstruction$hypotheses = hypotheses; + + # perform the reconstruction on the bootstrapped dataset + bootstrapped.topology = tronco.capri(curr.reconstruction, + command.capri, + regularization, + do.boot, + nboot.capri, + pvalue, + min.boot, + min.stat, + boot.seed, + do.estimation, + silent) + + curr.reconstruction = bootstrapped.topology; + + } + + # set the reconstructed selective advantage edges + bootstrap.results = list() + for (m in names(as.models(curr.reconstruction))) { + + # get the parents pos + parents.pos = array(list(), c(nevents(curr.reconstruction), 1)) + + + curr.adj.matrix = as.adj.matrix(curr.reconstruction,model=m)[[m]] + for(i in 1:nevents(curr.reconstruction)) { + for(j in 1:nevents(curr.reconstruction)) { + if(i!=j && curr.adj.matrix[i,j]==1) { + parents.pos[j, 1] = list(c(unlist(parents.pos[j,1]),i)) + } + } + } + + parents.pos[unlist(lapply(parents.pos,is.null))] = list(-1) + + # save the results + bootstrap.results[[m]] = t(parents.pos); + + + } + cat("\nBootstrap iteration", curr.iteration, "performed") + bootstrap.results + } + + stopCluster(cl) + cat("\n*** Reducing results\n") + + for (m in names(bootstrap.results)) { + y = Reduce(rbind, lapply(r, function(z, type){get(type, z)}, type=m)) + bootstrap.results[[m]] = y + } + + + # set the statistics of the bootstrap + for (m in names(as.models(reconstruction))) { + + curr.bootstrap.adj.matrix = bootstrap.adj.matrix[[m]] + + for(i in 2:ncol(curr.bootstrap.adj.matrix)) { + + curr.result = bootstrap.results[[m]][,i-1] + + for(j in 1:length(curr.result)) { + + curr.val = curr.result[[j]] + + for(k in 1:length(curr.val)) { + if(length(curr.val[k])==1 && curr.val[k] == -1) { + curr.bootstrap.adj.matrix[[1,i]] = curr.bootstrap.adj.matrix[[1,i]] + 1 + } else { + curr.bootstrap.adj.matrix[[curr.val[k] + 1, i]] = curr.bootstrap.adj.matrix[[curr.val[k] + 1, i]] + 1 + } + } + + } + + } + + bootstrap.adj.matrix[[m]] = curr.bootstrap.adj.matrix; + rownames(bootstrap.results[[m]]) = paste("Iteration ",1:nrow(bootstrap.results[[m]]),sep="") + + } + + # evalutate the overall confidence + for (m in names(as.models(reconstruction))) { + + curr.bootstrap.results = bootstrap.results[[m]] + + for(i in 1:nrow(curr.bootstrap.results)) { + + curr.adj.matrix = array(0, c(nevents(reconstruction),nevents(reconstruction))) + + for(j in 1:ncol(curr.bootstrap.results)) { + + curr.result = curr.bootstrap.results[i, j] + + for(k in 1:length(curr.result)) { + + curr.val = curr.result[[k]] + + for(l in 1:length(curr.val)) { + if(length(curr.val[l])>1 || curr.val[l] != -1) { + curr.adj.matrix[curr.val[l], j] = 1 + } + } + + } + + } + + # if I have a perfect match between the reconstructed topologies, increase the count + reconstructed.topology = as.adj.matrix(reconstruction,model=m)[[m]] + flag = TRUE; + + for (j in 1:nrow(reconstructed.topology)) { + for (k in 1:ncol(reconstructed.topology)) { + if(reconstructed.topology[j,k]!=curr.adj.matrix[j,k]) { + flag = FALSE + next() + } + } + } + + if(flag==TRUE) { + overall.confidence[[m]] = overall.confidence[[m]] + 1 + overall.frequency[[m]] = overall.confidence[[m]] / nboot + } + + } + + } + + # save the edge confidence and the frequency of the bootstrap adj.matrix + for (m in names(as.models(reconstruction))) { + + curr.adj.matrix = as.adj.matrix(reconstruction,model=m)[[m]]; + + # save the edge confidence + curr.bootstrap.matrix = bootstrap.adj.matrix[[m]][-1,-1]; + curr.edge.confidence = array(0,c(ncol(curr.bootstrap.matrix),nrow(curr.bootstrap.matrix))) + colnames(curr.edge.confidence) = colnames(curr.bootstrap.matrix); + rownames(curr.edge.confidence) = rownames(curr.bootstrap.matrix); + for (i in 1:ncol(curr.bootstrap.matrix)) { + for (j in 1:nrow(curr.bootstrap.matrix)) { + curr.edge.confidence[i,j] = (curr.adj.matrix[i,j]*as.numeric(curr.bootstrap.matrix[i,j]))/nboot + } + } + bootstrap.edge.confidence[[m]] = curr.edge.confidence + + # save the frequency of the bootstrap adj.matrix + curr.bootstrap.matrix = bootstrap.adj.matrix[[m]]; + curr.adj.matrix.frequency = array(0,c(ncol(curr.bootstrap.matrix),nrow(curr.bootstrap.matrix))) + colnames(curr.adj.matrix.frequency) = colnames(curr.bootstrap.matrix); + rownames(curr.adj.matrix.frequency) = rownames(curr.bootstrap.matrix); + for (i in 1:ncol(curr.bootstrap.matrix)) { + for (j in 1:nrow(curr.bootstrap.matrix)) { + curr.adj.matrix.frequency[i,j] = as.numeric(as.numeric(curr.bootstrap.matrix[i,j]))/nboot + } + } + bootstrap.adj.matrix.frequency[[m]] = curr.adj.matrix.frequency + + } + + # save the statistics of the bootstrap + for (m in names(as.models(reconstruction))) { + if(command == "non-parametric") { + bootstrap.statistics[[m]]$npb$bootstrap.results = bootstrap.results[[m]] + bootstrap.statistics[[m]]$npb$bootstrap.adj.matrix = list(count = bootstrap.adj.matrix[[m]], frequency = bootstrap.adj.matrix.frequency[[m]]) + bootstrap.statistics[[m]]$npb$bootstrap.edge.confidence = bootstrap.edge.confidence[[m]] + bootstrap.statistics[[m]]$npb$overall.confidence = list(count = overall.confidence[[m]], frequency = overall.frequency[[m]]) + bootstrap.statistics[[m]]$npb$bootstrap.settings = list(type = command, nboot = nboot) + } + else if(command == "parametric") { + bootstrap.statistics[[m]]$pb$bootstrap.results = bootstrap.results[[m]] + bootstrap.statistics[[m]]$pb$bootstrap.adj.matrix = list(count = bootstrap.adj.matrix[[m]], frequency = bootstrap.adj.matrix.frequency[[m]]) + bootstrap.statistics[[m]]$pb$bootstrap.edge.confidence = bootstrap.edge.confidence[[m]] + bootstrap.statistics[[m]]$pb$overall.confidence = list(count = overall.confidence[[m]], frequency = overall.frequency[[m]]) + bootstrap.statistics[[m]]$pb$bootstrap.settings = list(type = command, nboot = nboot) + } + else if(command == "statistical") { + bootstrap.statistics[[m]]$sb$bootstrap.results = bootstrap.results[[m]] + bootstrap.statistics[[m]]$sb$bootstrap.adj.matrix = list(count = bootstrap.adj.matrix[[m]], frequency = bootstrap.adj.matrix.frequency[[m]]) + bootstrap.statistics[[m]]$sb$bootstrap.edge.confidence = bootstrap.edge.confidence[[m]] + bootstrap.statistics[[m]]$sb$overall.confidence = list(count = overall.confidence[[m]], frequency = overall.frequency[[m]]) + bootstrap.statistics[[m]]$sb$bootstrap.settings = list(type = command, nboot = nboot) + } + } + + # save the execution time of the bootstrap + if(command == "non-parametric") { + bootstrap.statistics$npb$execution.time=(proc.time()-ptm) + } + else if(command == "parametric") { + bootstrap.statistics$pb$execution.time=(proc.time()-ptm) + } + else if(command == "statistical") { + bootstrap.statistics$sb$execution.time=(proc.time()-ptm) + } + + return(bootstrap.statistics) + +} + +#' convert an integer decimal number to binary +#' @title decimal.to.binary.dag +#' @param num.decimal decimal integer to be converted +#' @param num.bits number of bits to be used +#' @return num.binary: binary conversion of num.decimal +decimal.to.binary.dag = function(num.decimal, num.bits) { + + #structure where to save the result + num.binary = rep(0, num.bits) + + #convert the integer decimal number to binary + pos = 0 + while(num.decimal > 0) { + + #compute the value of the current step + num.binary[num.bits-pos] = num.decimal %% 2; + + #divide the number by 2 for the next iteration + num.decimal = num.decimal %/% 2 + pos = pos + 1 + } + return(num.binary) +} diff --git a/R/capri.estimation.R b/R/capri.estimation.R new file mode 100644 index 00000000..4d0d138e --- /dev/null +++ b/R/capri.estimation.R @@ -0,0 +1,610 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + + +#' enumerate all the paths between two nodes of a DAG +#' @title enumerate.all.paths +#' @param ancestor.node first node of the path +#' @param child.node last node of the path +#' @param parents.pos topological connections +#' @return all.paths: vector of all the paths +enumerate.all.paths = function( ancestor.node, child.node, parents.pos ) { + + # set the initial parents set + all.paths = parents.pos[[child.node]]; + if(length(all.paths)==1 && all.paths==-1) { + is.done = TRUE; + } + else { + is.done = FALSE; + } + + # visit all the nodes in topological order + while(is.done==FALSE) { + curr.paths = vector(); + is.done = TRUE; + for (i in 1:length(all.paths)) { + curr.new.path = all.paths[[i]]; + curr.new.parents = parents.pos[[curr.new.path[1]]]; + if(length(curr.new.parents)>1 || curr.new.parents!=-1) { + is.done = FALSE; + for (j in 1:length(curr.new.parents)) { + curr.paths[length(curr.paths)+1] = list(c(curr.new.parents[j],curr.new.path)); + } + } + else { + curr.paths[length(curr.paths)+1] = list(curr.new.path); + } + } + all.paths = curr.paths; + } + + # remove all the paths that are not visiting ancestor.node + curr.paths = vector(); + for (i in 1:length(all.paths)) { + curr.result = which(all.paths[[i]]%in%ancestor.node); + if(length(curr.result)>0) { + curr.new.path = all.paths[[i]]; + curr.paths[length(curr.paths)+1] = list(c(curr.new.path[which(all.paths[[i]]%in%ancestor.node):length(curr.new.path)],child.node)); + } + } + + all.paths = unique(curr.paths); + return(all.paths); + +} + +#' estimate the error rates by "L-BFGS-B" optimization in terms of L2-error +#' @title estimate.dag.error.rates +#' @param dataset a valid dataset +#' @param marginal.probs marginal probabilities +#' @param joint.probs joint probabilities +#' @param parents.pos which event is the parent? 0 if none, a number otherwise +#' @return estimated.error.rates: estimated probabilities, false positive and false negative error rates +estimate.dag.error.rates = function( dataset, marginal.probs, joint.probs, parents.pos ) { + + # function to be optimized by "L-BFGS-B" optimization in terms of L2-error + f.estimation <- function(errors) { + # set the current error rates with the starting point of the optimization being (e_pos,e_neg) = 0 + error.rates = list(error.fp=errors[1],error.fn=errors[2]); + # estimate the observed probabilities given the error rates + estimated.probs = estimate.dag.probs(dataset,marginal.probs,joint.probs,parents.pos,error.rates); + # evaluate the goodness of the estimatione by L2-error on the estimated marginal and joint probabilities + error.estimation = sum((marginal.probs-estimated.probs$marginal.probs)^2)+sum((joint.probs-estimated.probs$joint.probs)^2); + return(error.estimation); + } + + # the estimation is performed as in Byrd et al (1995) + # this method allows for box constraints, i.e., each variable can be given a lower and/or upper bound + estimated.error.rates = optim(c(0.00,0.00),f.estimation,method="L-BFGS-B",lower=c(0.00,0.00),upper=c(0.49,0.49))$par; + # structure to save the results + estimated.error.rates = list(error.fp=estimated.error.rates[1],error.fn=estimated.error.rates[2]); + + return(estimated.error.rates); + +} + +#' estimate the theoretical joint probability of two given nodes given the reconstructed topology +#' @title estimate.dag.joint.probs +#' @param first.node first node +#' @param second.node second node +#' @param parents.pos which event is the parent? -1 if none, a list otherwise +#' @param marginal.probs marginal probabilities +#' @param conditional.probs conditional probabilities +#' @return estimated.dag.joint.probs: estimated theoretical joint probability +estimate.dag.joint.probs = function(first.node, + second.node, + parents.pos, + marginal.probs, + conditional.probs ) + +{ + + # if the two nodes are roots + if((length(parents.pos[[first.node]])==1 && parents.pos[[first.node]]==-1) && (length(parents.pos[[second.node]])==1 && parents.pos[[second.node]]==-1)) { + estimated.dag.joint.probs = marginal.probs[first.node,1]*marginal.probs[second.node,1]; + } + # if the two nodes are the same node + else if(first.node==second.node) { + estimated.dag.joint.probs = marginal.probs[first.node,1]; + } + # otherwise + else { + # go through the parents starting from the two nodes to find if they are directly connected + # are the two nodes in the same path? + is.path = 0; + # check if first.node is an ancestor of second.node + curr.first = first.node; + curr.second = second.node; + while(length(unlist(parents.pos[curr.second]))>0 && (length(unlist(parents.pos[curr.second]))>1 || unlist(parents.pos[curr.second])!=-1)) { + curr.result = which(curr.first%in%curr.second); + if(length(curr.result)>0) { + is.path = 1; + is.child = second.node; + break; + } + curr.second = unique(unlist(parents.pos[curr.second])); + curr.second = curr.second[which(curr.second!=-1)]; + } + if(is.path==0) { + # check if second.node is an ancestor of first.node + curr.first = first.node; + curr.second = second.node; + while(length(unlist(parents.pos[curr.first]))>0 && (length(unlist(parents.pos[curr.first]))>1 || unlist(parents.pos[curr.first])!=-1)) { + curr.result = which(curr.first%in%curr.second); + if(length(curr.result)>0) { + is.path = 1; + is.child = first.node; + break; + } + curr.first = unique(unlist(parents.pos[curr.first])); + curr.first = curr.first[which(curr.first!=-1)]; + } + } + # check if the two nodes are at least connected + # are the two nodes connected? + is.connected = 0; + if(is.path==0) { + curr.first = first.node; + curr.second = second.node; + while(length(unlist(parents.pos[curr.first]))>0 && (length(unlist(parents.pos[curr.first]))>1 || unlist(parents.pos[curr.first])!=-1)) { + while(length(unlist(parents.pos[curr.second]))>0 && (length(unlist(parents.pos[curr.second]))>1 || unlist(parents.pos[curr.second])!=-1)) { + curr.result = which(curr.first%in%curr.second); + if(length(curr.result)>0) { + is.connected = 1; + is.ancestor = curr.result; + break; + } + else { + curr.second = unique(unlist(parents.pos[curr.second])); + curr.second = curr.second[which(curr.second!=-1)]; + } + } + if(is.connected==0) { + curr.first = unique(unlist(parents.pos[curr.first])); + curr.first = curr.first[which(curr.first!=-1)]; + curr.second = second.node; + } + else { + break; + } + } + } + # now I can set the joint probabilities + # in this case the two nodes are directly connected + # P(child,parent)_estimate = P(child); + if(is.path==1) { + estimated.dag.joint.probs = marginal.probs[is.child,1]; + } + # in this case the two nodes are indirectly connected + # P(i,j)_estimate = P(ancestor)_estimate * P_PATH(ancestor->first.node)_estimate * P_PATH(ancestor->second.node)_estimate + else if(is.connected==1) { + # as heuristic, if I have multiple common ancestors, I choose the later one to estimate the joint probability + is.ancestor = is.ancestor[which.min(marginal.probs[is.ancestor,1])]; + # P(ancestor)_estimate + estimated.dag.joint.probs = marginal.probs[is.ancestor,1]; + # P_PATH(ancestor->first.node)_estimate + all.first.paths = enumerate.all.paths(is.ancestor,first.node,parents.pos); + first.path = rep(1,length(all.first.paths)); + for (i in 1:length(all.first.paths)) { + curr.new.path = all.first.paths[[i]]; + if(length(curr.new.path)>0) { + for (j in 2:length(curr.new.path)) { + curr.parents = parents.pos[[curr.new.path[j-1]]]; + curr.conditional.probs = conditional.probs[[curr.new.path[j],1]]; + first.path[i] = first.path[i] * curr.conditional.probs; + } + } + } + # as heuristic, if I have multiple paths, I average the probability of each path + first.path = mean(first.path); + # P_PATH(ancestor->second.node)_estimate + all.second.paths = enumerate.all.paths(is.ancestor,second.node,parents.pos); + second.path = rep(1,length(all.second.paths)); + for (i in 1:length(all.second.paths)) { + curr.new.path = all.second.paths[[i]]; + if(length(curr.new.path)>0) { + for (j in 2:length(curr.new.path)) { + curr.parents = parents.pos[[curr.new.path[j-1]]]; + curr.conditional.probs = conditional.probs[[curr.new.path[j],1]]; + second.path[i] = second.path[i] * curr.conditional.probs; + } + } + } + # as heuristic, if I have multiple paths, I average the probability of each path + second.path = mean(second.path); + estimated.dag.joint.probs = estimated.dag.joint.probs * first.path * second.path; + } + # in this case the two nodes are not connected + # P(i,j)_estimate = P(i)_estimate * P(j)_estimate + else { + estimated.dag.joint.probs = marginal.probs[first.node,1]*marginal.probs[second.node,1]; + } + } + + return(estimated.dag.joint.probs); + +} + +#' estimate the marginal, joint and conditional probabilities given the reconstructed topology and the error rates +#' @title estimate.dag.probs +#' @param dataset a valid dataset +#' @param marginal.probs observed marginal probabilities +#' @param joint.probs observed joint probabilities +#' @param parents.pos position of the parents in the list of nodes +#' @param error.rates rates for the false positive and the false negative errors +#' @return estimated.probs: estimated marginal, joint and conditional probabilities +estimate.dag.probs = function( dataset, marginal.probs, joint.probs, parents.pos, error.rates ) { + + # structure where to save the probabilities to be estimated + estimated.marginal.probs = array(-1, dim=c(nrow(marginal.probs),1)); + estimated.joint.probs = array(-1, dim=c(nrow(marginal.probs),nrow(marginal.probs))); + estimated.conditional.probs = parents.pos; + + # compute the probability of the AND parents + parents.probs = array(0,c(nrow(parents.pos),1)); + children.probs = array(0,c(nrow(parents.pos),1)); + last.parent.pos = array(-1,c(nrow(parents.pos),1)); + for (i in 1:nrow(dataset)) { + for (j in 1:length(parents.pos)) { + if((length(parents.pos[[j,1]])==1 && parents.pos[[j,1]]==-1)||(sum(dataset[i,parents.pos[[j,1]]])==length(parents.pos[[j,1]]))) { + parents.probs[j,1] = parents.probs[j,1] + 1; + } + if(length(parents.pos[[j,1]])!=1 || parents.pos[[j,1]]!=-1) { + curr.last.parent = which.min(marginal.probs[parents.pos[[j,1]],1]); + last.parent.pos[j,1] = curr.last.parent[1]; + } + if((length(parents.pos[[j,1]])==1 && parents.pos[[j,1]]==-1 && dataset[i,j]==1)||((parents.pos[[j,1]]!=-1) && sum(dataset[i,c(parents.pos[[j,1]],j)])==length(c(parents.pos[[j,1]],j)))) { + children.probs[j,1] = children.probs[j,1] + 1; + } + } + } + parents.probs = parents.probs/nrow(dataset); + children.probs = children.probs/nrow(dataset); + + # estimate the theoretical conditional probabilities given the error rates + # this estimation is performed by applying the error rates to the marginal and joint probabilities + theoretical.conditional.probs = array(-1, dim=c(nrow(marginal.probs),1)); + for (i in 1:nrow(theoretical.conditional.probs)) { + # if the node has a parent, use the error rates to compute the conditional probability + # if the node has no parent, its conditional probability is not considered + if(parents.probs[i,1]!=1) { + # P(i|j)_theoretical = ((P(i,j)_obs-e_p*(P(j)_obs+P(i)_obs)+e_p^2)/(1-e_n-e_p)^2)/((P(j)_obs-e_p)/(1-e_n-e_p)) + theoretical.conditional.probs[i,1] = (children.probs[i,1]-error.rates$error.fp*(parents.probs[i,1]+marginal.probs[i,1])+error.rates$error.fp^2)/((parents.probs[i,1]-error.rates$error.fp)*(1-error.rates$error.fn-error.rates$error.fp)); + if(theoretical.conditional.probs[i,1]<0 || theoretical.conditional.probs[i,1]>1) { + # invalid theoretical conditional probability + if(theoretical.conditional.probs[i,1]<0) { + theoretical.conditional.probs[i,1] = 0; + } + else { + theoretical.conditional.probs[i,1] = 1; + } + } + } + } + + # estimate the marginal observed probabilities + # this estimation is performed by applying the topological constraints on the probabilities and then the error rates + # I do not have any constraint on the nodes without a parent + child.list <- which(parents.probs==1); + estimated.marginal.probs[child.list,1] = marginal.probs[child.list,1]; + estimated.marginal.probs.with.error = array(-1, dim=c(nrow(marginal.probs),1)); + estimated.marginal.probs.with.error[child.list,1] = estimated.marginal.probs[child.list,1]; + visited = length(child.list); + # I do not have any constraint for the joint probabilities on the pair of nodes which are the roots of the dag + estimated.joint = array(0, dim=c(nrow(marginal.probs),nrow(marginal.probs))); + for (i in child.list) { + for (j in child.list) { + if(i!=j) { + estimated.joint.probs[i,j] = joint.probs[i,j]; + estimated.joint[i,j] = -1; + } + } + } + + # visit the nodes with a parent in topological order + while (visited < nrow(estimated.marginal.probs)) { + # set the new child list + new.child = vector(); + # go through the current parents + for (node in child.list) { + # set the new children + curr.child <- which(last.parent.pos==node); + # go through the current children + for (child in curr.child) { + # set the marginal probability for this node + # P(child)_estimate = P(parent)_estimate * P(child|parent)_theoretical + estimated.marginal.probs[child,1] = estimated.marginal.probs[last.parent.pos[child,1],1]*theoretical.conditional.probs[child,1]; + visited = visited + 1; + # P(child,parent)_estimare = P(child)_estimate; + estimated.joint.probs[child,last.parent.pos[child,1]] = estimated.marginal.probs[child,1]; + estimated.joint[child,last.parent.pos[child,1]] = 1; + estimated.joint.probs[last.parent.pos[child,1],child] = estimated.marginal.probs[child,1]; + estimated.joint[last.parent.pos[child,1],child] = 1; + # apply the error rates to the marginal probabilities + # P(i)_obs_estimate = P(i)_estimate*(1-e_n) + P(not i)_estimate*e_p + estimated.marginal.probs.with.error[child,1] = error.rates$error.fp+(1-error.rates$error.fn-error.rates$error.fp)*estimated.marginal.probs[child,1]; + if(estimated.marginal.probs.with.error[child,1]<0 || estimated.marginal.probs.with.error[child,1]>1) { + # invalid estimated observed probability + if(estimated.marginal.probs.with.error[child,1]<0) { + estimated.marginal.probs.with.error[child,1] = 0; + } + else { + estimated.marginal.probs.with.error[child,1] = 1; + } + } + } + new.child <- c(new.child,curr.child); + } + # set the next child list + child.list = new.child; + } + diag(estimated.joint.probs) = estimated.marginal.probs; + diag(estimated.joint) = -1; + + # given the estimated observed probabilities, I can now also estimate the joint probabilities by applying the topological constraints and then the error rates + for (i in 1:nrow(estimated.joint.probs)) { + for (j in i:nrow(estimated.joint.probs)) { + # if I still need to estimate this joint probability + if(estimated.joint[i,j]==0) { + estimated.joint.probs[i,j] = estimate.dag.joint.probs(i,j,parents.pos,estimated.marginal.probs,theoretical.conditional.probs); + estimated.joint[i,j] = 1; + } + # now I can apply the error rates to estimate the observed joint probabilities + if(estimated.joint[i,j]==1) { + # P(i,j)_obs_estimate = P(i,j)_estimate*(1-e_n)^2+P(not i,j)_estimate*e_p*(1-e_n)+P(i,not j)_estimate*(1-e_n)*e_p+P(not i,not j)_estimate*e_p^2; + estimated.joint.probs[i,j] = estimated.joint.probs[i,j]*((1-error.rates$error.fn-error.rates$error.fp)^2)+error.rates$error.fp*(estimated.marginal.probs[i,1]+estimated.marginal.probs[j,1])-error.rates$error.fp^2; + # invalid estimated joint probability + if(estimated.joint.probs[i,j]<0 || estimated.joint.probs[i,j]>min(estimated.marginal.probs.with.error[i,1],estimated.marginal.probs.with.error[j,1])) { + if(estimated.joint.probs[i,j]<0) { + estimated.joint.probs[i,j] = 0; + } + else { + estimated.joint.probs[i,j] = min(estimated.marginal.probs.with.error[i,1],estimated.marginal.probs.with.error[j,1]); + } + } + estimated.joint.probs[j,i] = estimated.joint.probs[i,j]; + } + } + } + # save the estimated probabilities + estimated.marginal.probs = estimated.marginal.probs.with.error; + + # given the estimated observed and joint probabilities, I can finally compute the conditional probabilities + # P(child|parent)_obs_estimate = P(parent,child)_obs_estimate/P(parent)_obs_estimate + for (i in 1:length(estimated.conditional.probs)) { + curr.parents.pos = parents.pos[[i,1]]; + for (j in 1:length(parents.pos[[i,1]])) { + # if the node has no parent, its conditional probability is set to 1 + if(length(curr.parents.pos)==1 && curr.parents.pos==-1) { + curr.parents.pos = 1; + break; + } + else { + if(estimated.marginal.probs[curr.parents.pos[j],1]>0) { + curr.parents.pos[j] = estimated.joint.probs[curr.parents.pos[j],i]/estimated.marginal.probs[curr.parents.pos[j],1]; + } + else { + curr.parents.pos[j] = 0; + } + } + } + estimated.conditional.probs[[i,1]] = curr.parents.pos; + } + + # structure to save the results + estimated.probs = list(marginal.probs=estimated.marginal.probs,joint.probs=estimated.joint.probs,conditional.probs=estimated.conditional.probs); + return(estimated.probs); + +} + +#' estimate the probability of observing each sample in the dataset given the reconstructed topology +#' @title estimate.dag.samples +#' @param dataset a valid dataset +#' @param reconstructed.topology the reconstructed topology +#' @param estimated.marginal.probabilities estimated marginal probabilities of the events +#' @param estimated.conditional.probabilities estimated conditional probabilities of the events +#' @param parents.pos position of the parents of each node +#' @param error.rates error rates for false positives and false negatives +#' @return probabilities: probability of each sample +estimate.dag.samples = function(dataset, + reconstructed.topology, + estimated.marginal.probabilities, + estimated.conditional.probabilities, + parents.pos, error.rates) +{ + + # structure where to save the probabilities of the samples + probabilities = array(-1, c(nrow(dataset), 1)) + + # compute the position of the latest parent and its conditional probability for each node + last.parent.pos = array(-1, c(nrow(parents.pos), 1)) + curr.estimated.conditional.probabilities = array(1, c(nrow(estimated.conditional.probabilities), 1)) + + for (i in 1:length(parents.pos)) { + if(length(parents.pos[[i, 1]]) != 1 || parents.pos[[i, 1]] != -1) { + curr.last.parent = which.min(estimated.marginal.probabilities[parents.pos[[i, 1]], 1]) + last.parent.pos[i, 1] = parents.pos[[i, 1]][curr.last.parent[1]] + curr.estimated.conditional.probabilities[i, 1] = estimated.conditional.probabilities[[i, 1]][curr.last.parent[1]] + } + } + + # topological properties: + # 1. progression number + # 2. latest parent + # 3. level in the progression + + topology.structure = array(0, c(nrow(reconstructed.topology), 3)) + + # go through the subtrees within the topology + progression.count = 0 + + for (i in 1:nrow(reconstructed.topology)) { + + # if node i has no parents, it is a root + if(length(which(reconstructed.topology[, i] == 1)) == 0) { + progression.count = progression.count + 1 + level = 1 + + # set the parameters for the root + topology.structure[i,1] = progression.count + topology.structure[i,2] = -1 + topology.structure[i,3] = level + curr.node = i + + # go through this progression + while (length(curr.node) > 0) { + + # move to the next level + level = level + 1 + new.node = vector() + + for (j in 1:length(curr.node)) { + curr.new.node = which(reconstructed.topology[curr.node[j], ] == 1) + + if(length(curr.new.node) > 0) { + new.node = c(new.node,curr.new.node) + + for (k in 1:length(curr.new.node)) { + + # number of the current subprogression + topology.structure[curr.new.node[k], 1] = progression.count + + # parent of the current node + if(last.parent.pos[curr.new.node[k], 1] == curr.node[j]) { + topology.structure[curr.new.node[k], 2] = curr.node[j] + } + + # level of this node + topology.structure[curr.new.node[k], 3] = level + } + } + } + curr.node = new.node + } + } + } + + # go through the dataset and evalutate the probability of each sample + for (i in 1:nrow(dataset)) { + sample.probability = 1 + + for (j in 1:progression.count) { + + # probability of this subprogression (without any knowledge, I set it to 1) + curr.sample.probability = 1 + + # entries referring to this subprogression + curr.entry = which(topology.structure[, 1] == j) + + # samples of each element of this subprogression + curr.sample = dataset[i, curr.entry] + + # parents of each element of this subprogression + curr.parents = topology.structure[curr.entry, 2] + + # level of each element of this subprogression + curr.levels = topology.structure[curr.entry, 3] + + # set the probability as the one of the root of this progression + curr.sample.probability = curr.sample.probability * estimated.marginal.probabilities[curr.entry[which(curr.levels == 1, arr.ind = TRUE)], 1] + + # set the maximum level of this subprogression + max.level = curr.levels[which.max(curr.levels)] + + # if I have at least one event in this sample + if(length(curr.sample[curr.sample == 1]) > 0) { + + # visit the nodes starting from the lower level + is.valid = TRUE + + for (k in max.level:1) { + curr.level.nodes = which(curr.levels == k, arr.ind=TRUE) + + # if I'm not on a root + if(k > 1) { + curr.level.samples = curr.sample[curr.level.nodes] + + # if I have at least one event at this level + if(length(curr.level.samples[curr.level.samples == 1]) > 0) { + + # I can not have a child without its parent + curr.level.parent = curr.parents[curr.level.nodes] + + for (p in 1:length(curr.level.parent)) { + if(dataset[i, curr.level.parent[p]] == 0 && dataset[i, curr.entry[curr.level.nodes[p]]] == 1) { + is.valid = FALSE + break + } + } + } + + # if the sample is valid + if(is.valid == TRUE) { + + # add the probability of each edge + curr.level.parent = curr.parents[curr.level.nodes] + + for (p in 1:length(curr.level.parent)) { + if(dataset[i, curr.level.parent[p]] == 1 && dataset[i,curr.entry[curr.level.nodes[p]]] == 0) { + curr.sample.probability = curr.sample.probability * (1 - curr.estimated.conditional.probabilities[curr.entry[curr.level.nodes[p]], 1]) + } else if(dataset[i, curr.level.parent[p]]==1 && dataset[i, curr.entry[curr.level.nodes[p]]] == 1) { + curr.sample.probability = curr.sample.probability * curr.estimated.conditional.probabilities[curr.entry[curr.level.nodes[p]], 1] + } + } + } + } + + if(is.valid == FALSE) { + curr.sample.probability = 0 + break + } + } + if(is.valid == FALSE) { + sample.probability = 0 + break + } + } else { + # ..if this sample has no events for this progression + curr.sample.probability = 1 - curr.sample.probability + } + + # update the probability of the topology with the one of this sample + sample.probability = sample.probability * curr.sample.probability + + if(sample.probability == 0) { + break + } + } + probabilities[i, 1] = sample.probability + } + + # correct the estimation by the error rates + errors.matrix = array(0, c(nrow(probabilities), nrow(dataset))) + for (i in 1:nrow(probabilities)) { + for (j in 1:nrow(dataset)) { + curr.sample.x = as.numeric(dataset[i, ]) + curr.sample.y = as.numeric(dataset[j, ]) + errors.matrix[i, j] = (1 - error.rates$error.fp) ^ ((1 - curr.sample.x) %*% (1 - curr.sample.y)) * + error.rates$error.fp ^ ((1 - curr.sample.x) %*% curr.sample.y) * + (1 - error.rates$error.fn) ^ (curr.sample.x %*% curr.sample.y) * + error.rates$error.fn ^ (curr.sample.x %*% (1 - curr.sample.y)) + } + } + + probabilities[, 1] = as.numeric(as.vector(probabilities) %*% errors.matrix) + return(probabilities) + +} diff --git a/R/capri.hypotheses.R b/R/capri.hypotheses.R new file mode 100644 index 00000000..ad6f1a4b --- /dev/null +++ b/R/capri.hypotheses.R @@ -0,0 +1,1424 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + + +# Add a new hypothesis by creating a new event and adding it to the compliant genotypes +#' @title hypothesis add +#' @param data A TRONCO compliant dataset. +#' @param pattern.label Label of the new hypothesis. +#' @param lifted.pattern Vector to be added to the lifted genotype resolving the pattern related to the new hypothesis +#' @param pattern.effect Possibile effects for the pattern. +#' @param pattern.cause Possibile causes for the pattern. +#' @return A TRONCO compliant object with the added hypothesis +#' @export hypothesis.add + +hypothesis.add = function(data, + pattern.label, + lifted.pattern, + pattern.effect = "*", + pattern.cause = "*" ) + +{ + + # save the needed data structures + if(!is.null(data$genotypes) && !is.null(data$annotations)) { + genotypes = data$genotypes; + annotations = data$annotations; + } else { + genotypes = NULL; + annotations = NULL; + } + + if(!is.null(data$hypotheses)) { + hypotheses = data$hypotheses; + } else { + hypotheses = NA; + } + + # add the hypothesis only if all the inputs are correctly provided + if(!is.null(genotypes) && !is.null(annotations)) { + + # the Boolean functions look for a set of global variables + # if there are already global variables named as the ones used here, make the backup of them + do.roll.back.lifting.genotypes = FALSE; + do.roll.back.lifting.annotations = FALSE; + do.roll.back.lifting.edges = FALSE; + do.roll.back.fisher.pvalues = FALSE; + + # I need a global variable to save the genotypes of the lifted pattern + # if there is already a global variable named lifting.genotypes, make the backup of it + if(exists("lifting.genotypes")) { + roll.back.lifting.genotypes = lifting.genotypes; + do.roll.back.lifting.genotypes = TRUE; + } + assign("lifting.genotypes",genotypes,envir=.GlobalEnv); + + # I need a global variable to save the annotations of the lifted pattern + # if there is already a global variable named lifting.annotations, make the backup of it + if(exists("lifting.annotations")) { + roll.back.lifting.annotations = lifting.annotations; + do.roll.back.lifting.annotations = TRUE; + } + assign("lifting.annotations",annotations,envir=.GlobalEnv); + + # I need a global variable to save the edges of the lifted pattern + # if there is already a global variable named lifting.edges, make the backup of it + if(exists("lifting.edges")) { + roll.back.lifting.edges = lifting.edges; + do.roll.back.lifting.edges = TRUE; + } + assign("lifting.edges",NULL,envir=.GlobalEnv); + + # I need a global variable to save the pvalues of the lifted pattern + # if there is already a global variable named fisher.pvalues, make the backup of it + if(exists("fisher.pvalues")) { + roll.back.fisher.pvalues = fisher.pvalues; + do.roll.back.fisher.pvalues = TRUE; + } + assign("fisher.pvalues",NULL,envir=.GlobalEnv); + + # save the lifted genotypes and its hypotheses for the current pattern + curr_pattern = lifted.pattern$pattern; + curr_hypotheses = lifted.pattern$hypotheses; + curr_pvalues = fisher.pvalues; + + # save the edges of the lifted pattern + hstructure = lifting.edges; + + # roll back to the previous value of the global variable lifting.genotypes if any or remove it + if(do.roll.back.lifting.genotypes) { + assign("lifting.genotypes",roll.back.lifting.genotypes,envir=.GlobalEnv); + } else { + rm(lifting.genotypes,pos=".GlobalEnv"); + } + + # roll back to the previous value of the global variable lifting.annotations if any or remove it + if(do.roll.back.lifting.annotations) { + assign("lifting.annotations",roll.back.lifting.annotations,envir=.GlobalEnv); + } else { + rm(lifting.annotations,pos=".GlobalEnv"); + } + + # roll back to the previous value of the global variable lifting.edges if any or remove it + if(do.roll.back.lifting.edges) { + assign("lifting.edges",roll.back.lifting.edges,envir=.GlobalEnv); + } else { + rm(lifting.edges,pos=".GlobalEnv"); + } + + # roll back to the previous value of the global variable fisher.pvalues if any or remove it + if(do.roll.back.fisher.pvalues) { + assign("fisher.pvalues",roll.back.fisher.pvalues,envir=.GlobalEnv); + } else { + rm(fisher.pvalues,pos=".GlobalEnv"); + } + + # set the hypotheses number + if(!is.na(hypotheses[1])) { + num.hypotheses = hypotheses$num.hypotheses; + } else { + num.hypotheses = 0; + } + + # * is a special pattern.effect which indicates to use all the events as effects for this pattern + is.to.all.effects = FALSE; + if(pattern.effect[[1]][1]=="*") { + + pattern.effect = colnames(genotypes)[1:(length(colnames(genotypes))-num.hypotheses)]; + + # any event can not be both causes and effects for the pattern to be well-formed + pattern.effect = list(pattern.effect[-which((pattern.effect%in%unlist(curr_hypotheses$llist)))]); + is.to.all.effects = TRUE; + + if(length(pattern.effect)==0) { + stop(paste("[ERR] Missing list of effects to test or wildcard \'*\'.", sep='')); + } + + } + + # check the pattern to be well-formed + all.col.nums = vector(); + if(length(pattern.effect)==0) { + stop(paste("[ERR] Missing list of effects or wildcard \'*\'.", sep='')); + } else { + # check the effects of the pattern to be well-formed + for (i in 1:length(pattern.effect)) { + + curr.pattern.effect = pattern.effect[[i]]; + if(is.to.all.effects==FALSE) { + col.num = -1; + if(length(curr.pattern.effect)==1) { + event.map = emap(c(curr.pattern.effect,"*"),genotypes,annotations); + col.num = event.map$col.num; + events.name = event.map$events.name; + } else if(length(curr.pattern.effect)==2) { + event.map = emap(curr.pattern.effect,genotypes,annotations); + col.num = event.map$col.num; + events.name = event.map$events.name; + } + } + else { + col.num = which(colnames(genotypes)%in%curr.pattern.effect); + if(length(col.num)==0) { + col.num = -1; + } + events.name = curr.pattern.effect; + } + + # check the effect to be a valid event + if(col.num[1]==-1) { + stop(paste("[ERR] Unknown gene among effects: \"", curr.pattern.effect, "\".",sep='')); + } + all.col.nums = append(all.col.nums,col.num); + + # check the pattern to be well-formed + # if the effect is in the pattern, the pattern is not well-formed + if(length(which(unlist(curr_hypotheses$llist)%in%events.name))>0) { + stop(paste("[ERR] Bad formed pattern, event \"", curr.pattern.effect, "\" yields a loop.",sep='')); + } + + } + } + + # look for duplicated effects in the pattern + if(anyDuplicated(all.col.nums)>0) { + stop(paste("[ERR] Bad formed pattern, duplicated events ", + paste(pattern.effect[duplicated(pattern.effect)], collapse=', ', sep=''), + "within effects.", sep='')); + } + + # check that the we are not duplicating any name by adding the new pattern + if(length(which(colnames(genotypes)==pattern.label))>0) { + stop(paste("[ERR] This pattern already exists.", sep='')); + } + + # add the pattern to the genotypes + genotypes = cbind(genotypes,curr_pattern); + + # check that the pattern is valid according to Suppes' theory + # compute the observed and observed joint probabilities + pair.count <- array(0, dim=c(ncol(genotypes),ncol(genotypes))); + # compute the probabilities on the genotypes + for(i in 1:ncol(genotypes)) { + for(j in 1:ncol(genotypes)) { + val1 = genotypes[ ,i]; + val2 = genotypes[ ,j]; + pair.count[i,j] = (t(val1)%*%val2); + } + } + # marginal.probs is an array of the observed marginal probabilities + marginal.probs <- array(as.matrix(diag(pair.count)/nrow(genotypes)),dim=c(ncol(genotypes),1)); + # joint.probs is an array of the observed joint probabilities + joint.probs <- as.matrix(pair.count/nrow(genotypes)); + # check that the probability of the pattern is in (0,1) + if(marginal.probs[ncol(genotypes)]==0 || marginal.probs[ncol(genotypes)]==1) { + stop(paste("[ERR] The pattern has marginal probability ", marginal.probs[ncol(genotypes)], + ", which should be in (0,1).", sep='')); + } + + # check that the pattern does not duplicate any existing column + i = ncol(genotypes); + for(j in 1:ncol(genotypes)) { + # if the edge is valid, i.e., not self cause + if(i!=j) { + #if the two considered events are not distinguishable + if((joint.probs[i,j]/marginal.probs[i])==1 && (joint.probs[i,j]/marginal.probs[j])==1) { + stop(paste("[ERR] Pattern duplicates ", paste(as.events(data)[j, ], collapse=' ', sep=''), + ".", sep='')); + } + } + } + + # * is a special pattern.cause which indicates to use all the events as causes for this pattern + is.to.all.causes = FALSE; + if(pattern.cause[[1]][1]=="*") { + + pattern.cause = colnames(genotypes)[1:(length(colnames(genotypes))-num.hypotheses-1)]; + + # any event can not be both causes and effects for the pattern to be well-formed + pattern.cause = list(pattern.cause[-which((pattern.cause%in%unlist(curr_hypotheses$llist)))]); + is.to.all.causes = TRUE; + + } + + # check the pattern to be well-formed + all.col.nums = vector(); + if(length(pattern.cause)>0) { + + # check the causes of the pattern to be well-formed + for (i in 1:length(pattern.cause)) { + + curr.pattern.cause = pattern.cause[[i]]; + if(is.to.all.causes==FALSE) { + col.num = -1; + if(length(curr.pattern.cause)==1) { + event.map = emap(c(curr.pattern.cause,"*"),genotypes,annotations); + col.num = event.map$col.num; + events.name = event.map$events.name; + } + else if(length(curr.pattern.cause)==2) { + event.map = emap(curr.pattern.cause,genotypes,annotations); + col.num = event.map$col.num; + events.name = event.map$events.name; + } + } + else { + col.num = which(colnames(genotypes)%in%curr.pattern.cause); + if(length(col.num)==0) { + col.num = -1; + } + events.name = curr.pattern.cause; + } + + # check the cause to be a valid event + if(col.num[1]==-1) { + stop(paste("[ERR] Unknown gene among causes: \"", curr.pattern.cause, "\".",sep='')); + } + all.col.nums = append(all.col.nums,col.num); + + # check the pattern to be well-formed + # if the cause is in the pattern, the pattern is not well-formed + if(length(which(unlist(curr_hypotheses$llist)%in%events.name))>0) { + stop(paste("[ERR] Bad formed pattern, event \"", curr.pattern.cause, "\" yields a loop.",sep='')); + } + + } + } + + # look for duplicated causes in the pattern + if(anyDuplicated(all.col.nums)>0) { + stop(paste("[ERR] Bad formed pattern, duplicated events ", + paste(pattern.cause[duplicated(pattern.cause)], collapse=', ', sep=''), + "within causes.", sep='')); + } + + # now I can finally add the hypothesis + colnames(genotypes)[ncol(genotypes)] = pattern.label; + if(is.na(hypotheses[1])) { + hypotheses = list(); + } + hypotheses$num.hypotheses = num.hypotheses + 1; + + # add the new hypothesis in the annotations + annotations = rbind(data$annotations,c("Pattern", pattern.label)); + rownames(annotations)[nrow(annotations)] = pattern.label; + + # add the color of the type "Hypothesis" is not already defined + if(any(rownames(data$types)=="Pattern")==FALSE) { + types = rbind(data$types, 'slateblue'); + rownames(types)[nrow(types)] = "Pattern"; + data$types = types; + } + + # create the list of added hypotheses + if(length(hypotheses$hlist)==0) { + hypotheses$hlist = vector(); + } + # add the new hypothesis to the list + for (i in 1:length(pattern.effect)) { + curr.pattern.effect = pattern.effect[[i]]; + if(is.to.all.effects==FALSE) { + if(length(curr.pattern.effect)==1) { + event.map = emap(c(curr.pattern.effect,"*"),genotypes,annotations); + col.num = event.map$col.num; + } + else if(length(curr.pattern.effect)==2) { + event.map = emap(curr.pattern.effect,genotypes,annotations); + col.num = event.map$col.num; + } + } + else { + col.num = which(colnames(genotypes)%in%curr.pattern.effect); + if(length(col.num)==0) { + col.num = -1; + } + } + for (j in 1:length(col.num)) { + hypotheses$hlist = rbind(hypotheses$hlist,t(c(colnames(genotypes)[ncol(genotypes)],colnames(genotypes)[col.num[j]]))); + } + if(is.null(colnames(hypotheses$hlist))) { + colnames(hypotheses$hlist) = c("cause","effect"); + } + } + + # add the causes of the new hypothesis to the list + if(length(pattern.cause)>0) { + for (i in 1:length(pattern.cause)) { + curr.pattern.cause = pattern.cause[[i]]; + if(is.to.all.causes==FALSE) { + if(length(curr.pattern.cause)==1) { + event.map = emap(c(curr.pattern.cause,"*"),genotypes,annotations); + col.num = event.map$col.num; + } + else if(length(curr.pattern.cause)==2) { + event.map = emap(curr.pattern.cause,genotypes,annotations); + col.num = event.map$col.num; + } + } + else { + col.num = which(colnames(genotypes)%in%curr.pattern.cause); + if(length(col.num)==0) { + col.num = -1; + } + } + for (j in 1:length(col.num)) { + hypotheses$hlist = rbind(hypotheses$hlist,t(c(colnames(genotypes)[col.num[j]],colnames(genotypes)[ncol(genotypes)]))); + } + if(is.null(colnames(hypotheses$hlist))) { + colnames(hypotheses$hlist) = c("cause","effect"); + } + } + } + + # create the list of hypotheses' structures + if(length(hypotheses$hstructure)==0) { + hypotheses$hstructure = new.env(hash=TRUE,parent=emptyenv()); + } + hypotheses$hstructure[[pattern.label]] = get.lifted.pattern(hstructure); + + # add the atoms of the hypothesis + if(length(hypotheses$patterns)==0) { + hypotheses$patterns = list(); + } + hypotheses$patterns[pattern.label] = lifted.pattern$hypotheses$llist; + + #add the hypotheses of the atoms + if(length(hypotheses$atoms)==0) { + hypotheses$atoms = vector(mode="list",length=(ncol(genotypes)-hypotheses$num.hypotheses)); + names(hypotheses$atoms) = colnames(genotypes)[1:(ncol(genotypes)-hypotheses$num.hypotheses)]; + } + atoms.in.pattern = which(names(hypotheses$atoms)%in%unlist(hypotheses$patterns[pattern.label])); + if(length(atoms.in.pattern)>0) { + for (i in 1:length(atoms.in.pattern)) { + hypotheses$atoms[[atoms.in.pattern[i]]] = append(hypotheses$atoms[[atoms.in.pattern[i]]], pattern.label); + } + } + + #add the fisher pvalues + if(length(hypotheses$pvalues)==0) { + hypotheses$pvalues = vector(); + } + hypotheses$pvalues = append(hypotheses$pvalues,list(curr_pvalues)) + names(hypotheses$pvalues)[length(hypotheses$pvalues)] = pattern.label; + + #return the new (compliant) data structure as result + data$genotypes = genotypes; + data$annotations = annotations; + data$hypotheses = hypotheses; + + return(data); + } else { + stop("[ERR] Missing genotypes or pattern."); + } + + return(NA); + +} + + +# resolve the ellipsis for the effects +hypothesis.lifted.effects = function( ... ) { + return(list(...)); +} + + +#' Add all the hypotheses related to a group of events +#' @title hypothesis add group +#' @param x A TRONCO compliant dataset. +#' @param FUN Type of pattern to be added, e.g., co-occurance, soft or hard exclusivity. +#' @param group Group of events to be considered. +#' @param pattern.cause Possibile causes for the pattern. +#' @param pattern.effect Possibile effects for the pattern. +#' @param dim.min Minimum cardinality of the subgroups to be considered. +#' @param dim.max Maximum cardinality of the subgroups to be considered. +#' @param min.prob Minimum probability associated to each valid group. +#' @return A TRONCO compliant object with the added hypotheses +#' @export hypothesis.add.group + +hypothesis.add.group = function(x, + FUN, + group, + pattern.cause = '*', + pattern.effect = '*', + dim.min = 2, + dim.max = length(group), + min.prob = 0) +{ + op = deparse(substitute(FUN)) + + effect = paste0("c('", paste(pattern.effect, collapse = "', '"), "')") + cause = paste0("c('", paste(pattern.cause, collapse = "', '"), "')") + + ngroup = length(group) + if (ngroup < 2) { + warning("No hypothesis will be created for groups with less than 2 elements.") + return(x) + } + + cat("*** Adding Group Hypotheses\n") + cat(' Group:', paste(group, collapse = ", ", sep = ""), '\n') + cat(' Function:', op, '\n') + cat(' Cause:', paste(pattern.cause, collapse=", "), '; ') + cat(' Effect:', paste(pattern.effect, collapse=", "), '.\n') + flush.console() + + if(min.prob > 0) + { + cat('\nFiltering genes within the group with alteration frequency below', min.prob, '\n') + + temp = events.selection(x, filter.in.names = group) + temp = as.alterations(temp) + temp = events.selection(temp, filter.freq = min.prob) + + group = as.genes(temp) + cat('New group:', paste(group, collapse = ", ", sep = ""), '\n') + } + + ngroup = length(group) + if (ngroup < 2) { + warning("No hypothesis will be created for groups with less than 2 elements.") + return(x) + } + + hom.group = lapply(group, function(g, x) { + if (nevents(x, genes = g) > 1) { + T + } else { + F + } + }, x) + hom.group = group[unlist(hom.group)] + + gene.hom = function(g, h) { + if (g %in% h) + { + if( any(rowSums(as.gene(x, genes = g)) > 1) ) return(paste0("OR('", g, "')")) + else return(paste0("XOR('", g, "')")) + } + return(paste0("'", g, "'")) + } + + max.groupsize = min(dim.max, ngroup) + min.groupsize = max(2, dim.min) + + if(dim.min > dim.max) { + stop('ERROR - dim.min > dim.max') + } + + if(min.groupsize > max.groupsize) { + stop('ERROR - min.groupsize > max.groupsize') + } + + if (length(hom.group) > 0) + cat("Genes with multiple events: ", paste(unlist(hom.group), collapse=', ', sep=''), "\n") + + error.summary = data.frame() + + # Get an analytical pattern... ! + tot.patterns = 0 + for (i in min.groupsize:max.groupsize) { + tot.patterns = tot.patterns + ncol(combn(unlist(group), i)) + } + + # create a progress bar + cat('Generating ', tot.patterns ,'patterns [size: min =', max.groupsize,' - max =', max.groupsize, '].\n') + + # pb <- txtProgressBar(0, tot.patterns, style = 3) + if(!exists('hide.progress.bar') || !hide.progress.bar) { + flush.console() + } + + pbPos = 0 + for (i in min.groupsize:max.groupsize) { + gr = combn(unlist(group), i) + + for (j in 1:ncol(gr)) { + genes = as.list(gr[, j]) + + #start the progress bar + pbPos = pbPos + 1 + + hypo.name = paste(unlist(genes), sep = "_", collapse = "_") + hypo.genes = paste(lapply(genes, function(g, hom.group) { + gene.hom(g, hom.group) + }, hom.group), collapse = ", ") + + hypo.add = paste0("hypothesis.add(x, ", + "pattern.label= '", op, "_", hypo.name, "', ", + "lifted.pattern= ", op, "(", hypo.genes, "), ", + "pattern.effect=", effect, ", ", + "pattern.cause=", cause, ")") + + err = tryCatch({ + x = eval(parse(text = hypo.add)) + }, error = function(cond) { + + m = paste("Error on", hypo.add, ".\n", cond) + code = strsplit(as.character(cond), " ")[[1]] + idx.errcode = which(code == "[ERR]", arr.ind = TRUE) + 1 + + return( + data.frame( + pattern = paste(unlist(genes), collapse = ", ", sep = ""), + error = paste(code[idx.errcode:length(code)], collapse = " ") + )) + + }, warning = function(cond) { + m = paste("Warning on", hypo.add, ".\n", cond) + return(genes) + }) + # Dummy errors detection + if (!("genotypes" %in% names(err))) + error.summary = rbind(error.summary, err) + } + } + + + if (nrow(error.summary) > 0) { + cat(paste(nrow(error.summary), " genes pattern could not be added -- showing errors\n", sep = "")) + print(error.summary) + } else cat("Hypothesis created for all possible patterns.\n") + + return(x) +} + +#' Add all the hypotheses related to homologou events +#' @title hypothesis.add.homologous +#' @param x A TRONCO compliant dataset. +#' @param pattern.cause Possibile causes for the pattern. +#' @param pattern.effect Possibile effects for the pattern. +#' @param genes List of genes to be considered as possible homologous. For these genes, all the types of mutations will be considered functionally equivalent. +#' @param FUN Type of pattern to be added, e.g., co-occurance, soft or hard exclusivity. +#' @return A TRONCO compliant object with the added hypotheses +#' @export hypothesis.add.homologous +hypothesis.add.homologous = function(x, + pattern.cause = '*', + pattern.effect = '*', + genes = as.genes(x), + FUN = OR) +{ + + op = deparse(substitute(FUN)) + + hom.group = lapply(genes, function(g, x) { + if (nevents(x, genes = g) > 1) + T + else F + }, x) + + hom.group = genes[unlist(hom.group)] + + if (length(hom.group) == 0) { + warning("No genes with multiple events.") + return(x) + } + + cat("*** Adding hypotheses for Homologous Patterns\n") + cat(' Genes:', paste(hom.group, collapse = ", ", sep = ""), '\n') + cat(' Function:', op, '\n') + cat(' Cause:', paste(pattern.cause, collapse=", "), '\n') + cat(' Effect:', paste(pattern.effect, collapse=", "), '\n') + flush.console() + + effect = paste0("c('", paste(pattern.effect, collapse = "', '"), "')") + cause = paste0("c('", paste(pattern.cause, collapse = "', '"), "')") + + if (length(hom.group) == 0) { + warning("No genes with multiple events.") + return(x) + } + + # create a progress bar + if(!exists('hide.progress.bar') || !hide.progress.bar) { + pb <- txtProgressBar(0, length(hom.group), style = 3) + } + + + error.summary = data.frame() + + for (i in 1:length(hom.group)) { + + #start the progress bar + if(!exists('hide.progress.bar') || !hide.progress.bar) { + setTxtProgressBar(pb, i) + } + # Check if the joint probability of homologous events is > 0, if + # yes the event will be added as 'OR', otherwise 'XOR' + if( any(rowSums(as.gene(x, genes = hom.group[[i]])) > 1)) { + FUN = 'OR' + } else { + FUN = 'XOR' + } + + hypo.add = paste0("hypothesis.add(x, ", + "pattern.label= '", FUN, "_", hom.group[[i]], "', ", + "lifted.pattern= ", FUN, "('", hom.group[[i]], "'), ", + "pattern.cause= ", cause, ", ", + "pattern.effect=", effect, ")") + + err = tryCatch({ + x = eval(parse(text = hypo.add)) + }, error = function(cond) { + m = paste("Error on", hypo.add, ".\n", cond) + code = strsplit(as.character(cond), " ")[[1]] + idx.errcode = which(code == "[ERR]", arr.ind = TRUE) + 1 + + return(data.frame(pattern = paste(unlist(hom.group[[i]]), collapse = ", ", sep = ""), error = paste(code[idx.errcode:length(code)], collapse = " "))) + + }, warning = function(cond) { + m = paste("Warning on", hypo.add, ".\n", cond) + return(genes) + }) + # Dummy errors detection + if (!("genotypes" %in% names(err))) + error.summary = rbind(error.summary, err) + } + + # close progress bar + if(!exists('hide.progress.bar') || !hide.progress.bar) { + close(pb) + } + + if (nrow(error.summary) > 0) { + cat(paste(nrow(error.summary), " patterns could not be added -- showing errors\n", sep = "")) + print(error.summary) + } else cat("Hypothesis created for all possible gene patterns.\n") + + return(x) +} + +#' Internal function for hypotheses expansion +#' @title hypotheses.expansion +#' @param input_matrix A TRONCO adjacency matrix +#' @param map hypothesis name - hypothesis adjacency matrix map +#' @param hidden_and Should I visualize hidden and? +#' @param expand Should I expand the hypotheses? +#' @param skip.disconnected Hide disconnected node +#' @import igraph +hypotheses.expansion <- function(input_matrix, + map = list(), + hidden_and = T, + expand = T, + skip.disconnected = TRUE + ) + +{ + # get node list + node_list <- colnames(input_matrix) + + # cut input matrix + num_hypos = 0 + if(length(map) > 0) { + num_hypos = Reduce(sum, lapply(ls(map), function(x, y){if(x %in% y)return(1)}, y=node_list)) + } + + margin = length(node_list) - num_hypos + hypos_new_name = list() + + # check if there are hypotheses + if (num_hypos == 0 || !expand) { + # if no hypos do nothings.. + min_matrix = input_matrix + } else { + # ..else expand them + min_matrix = input_matrix[-(margin+1):-length(node_list), -(margin+1):-length(node_list)] + + # create graph from matrix + min_graph = graph.adjacency(min_matrix) + + for (h in ls(map)) { + + if (! h %in% node_list) { + next + } + + hypo = map[[h]] + + # create graph from hypo + hypo_graph = graph.adjacency(hypo) + + # name of this node + h_mat <- rowSums(get.adjacency(hypo_graph, sparse=FALSE)) + + initial_node <- names(h_mat)[which(h_mat==0)] + hypos_new_name[initial_node] = h + + # change names in confidence matrix according to hypotesis + display.up = FALSE + if (length(which(input_matrix[, h] == 1)) != 0) { + display.up = TRUE + } + + display.down = FALSE + if (length(which(input_matrix[h, ] == 1)) != 0) { + display.down = TRUE + } + + # display up hypo and reconnect + if (display.up) { + + hypo_pre = t(hypo) + + node_names = rownames(hypo_pre) + node_names = lapply(node_names, function(x){ if(is.logic.node(x)) { paste0('UP', x) } else { return(x) } }) + + + + rownames(hypo_pre) = node_names + colnames(hypo_pre) = node_names + + # create graph from hypo + hypo_graph_pre = graph.adjacency(hypo_pre) + + # name of this node + h_mat_pre <- colSums(get.adjacency(hypo_graph_pre, sparse=FALSE)) + + final_node <- names(h_mat_pre)[which(h_mat==0)] + hypos_new_name[final_node] = h + + # edge to reconstruct + h_edge <- input_matrix[, h] + initial_node_up <- names(h_edge)[which(h_edge==1)] + + # add this graph to main graph + min_graph = graph.union(min_graph, hypo_graph_pre) + + # recreate lost edge + for (node in initial_node_up) { + min_graph <- min_graph + edge(node, final_node) + } + + } + + # display down hypo + if (display.down) { + + # edge to reconstruct + h_edge <- input_matrix[h,] + final_node <- names(h_edge)[which(h_edge==1)] + + # add this graph to main graph + min_graph = graph.union(min_graph, hypo_graph) + + # recreate lost edge + for (node in final_node) { + min_graph <- min_graph + edge(initial_node, node) + } + } + + } + min_matrix = get.adjacency(min_graph, sparse = F) + } + + + # now expand the hidden AND + if(hidden_and == F) { + # sort col and row (igraph wants the same order) + min_matrix = min_matrix[,order(colnames(min_matrix))] + min_matrix = min_matrix[order(rownames(min_matrix)),] + return(list(min_matrix, hypos_new_name)) + } + + cat('\n*** Expand hidden and:') + + and_matrix = NULL + to_reconnect = list() + logical_op = list("AND", "OR", "NOT", "XOR", "UPAND", 'UPOR', 'UPXOR') + + # foreach AND column + for (col in colnames(min_matrix)) { + prefix = gsub("_.*$", "", col) + if ( !(prefix %in% logical_op) && sum(min_matrix[,col]) > 1 ) { + # not logical operator and colsum > 1 there is a hidden + # AND and something has to be done.. + + # remember to reconnect the fake and to this node + to_reconnect = append(to_reconnect, col) + + # append a colum from the old matrix.. + and_matrix = cbind(and_matrix, min_matrix[,col]) + pos = ncol(and_matrix) + + # and give her a new name based on the old one + new_col_name = paste("*", col, sep="_") + colnames(and_matrix)[pos] = new_col_name + + # append a 0 columl to the matrix.. + and_matrix = cbind(and_matrix, matrix(0,nrow = nrow(and_matrix), ncol = 1)) + pos = ncol(and_matrix) + + # and give her the old name + colnames(and_matrix)[pos] = col + + # now do the same to conf_matrix + if(!is.null(conf_matrix)) { + conf_matrix = cbind(conf_matrix, conf_matrix[, col]) + pos = ncol(conf_matrix) + colnames(conf_matrix)[pos] = new_col_name + } + + } else { + # ..else add the row taken from the old matrix + # and set the correct colname + and_matrix = cbind(and_matrix, min_matrix[,col]) + pos = ncol(and_matrix) + colnames(and_matrix)[pos] = col + } + } + + # now reconnect AND node to his gene (AND_Gene7 -> Gene7) + for(row in to_reconnect) { + and_matrix = rbind(and_matrix, matrix(0, ncol=ncol(and_matrix), nrow = 1)) + pos = nrow(and_matrix) + rownames(and_matrix)[pos] = paste0("*_", row) + and_matrix[paste0("*_", row),row] = 1 + } + + + # sort col and row (igraph wants the same order) + and_matrix = and_matrix[,order(colnames(and_matrix))] + and_matrix = and_matrix[order(rownames(and_matrix)),] + + if(!is.null(conf_matrix)) { + return(list(and_matrix, hypos_new_name, conf_matrix)) + } + return(list(and_matrix, hypos_new_name)) +} + +# Utility function to add the hypotheses +aux.log = function( genotypes, annotations, function.name, ... ) { + + if(!is.null(genotypes) && !is.null(annotations) && length(list(...)) > 0) { + + clauses = list(...) + curr_genotypes = array(0, c(nrow(genotypes), length(clauses))) + hypotheses = list() + function.inputs = list() + fisher.tests = vector() + + for (i in 1:length(clauses)) { + + # if the clause is given by name, get the column from the genotypes + if(typeof(clauses[[i]]) == "character") { + + col.num = -1 + # if I have the label, get the column in the genotypes for this event + if(length(clauses[[i]]) == 1) { + event.map = emap(c(clauses[[i]],"*"), genotypes, annotations) + col.num = event.map$col.num + } + else if(length(clauses[[i]]) == 2) { + event.map = emap(clauses[[i]], genotypes, annotations) + col.num = event.map$col.num + } + + if(col.num[1] == -1) { + stop(paste("[ERR] No events for gene ", paste(clauses[[i]], collapse=', ', sep=''))) + } + else { + curr_genotypes[,i] = genotypes[,col.num[1]] + if(length(col.num)>1) { + curr_genotypes = cbind(curr_genotypes, genotypes[ ,col.num[2:length(col.num)]]) + } + + if(length(hypotheses$llist) == 0) { + hypotheses$llist = list(event.map$events.name) + } + else { + hypotheses$llist = list(c(unlist(hypotheses$llist), event.map$events.name)) + } + for (j in 1:length(event.map$events.name)) { + function.name = paste(function.name,"_",event.map$events.name[j],sep="") + function.inputs = c(function.inputs,event.map$events.name[j]) + } + } + + } + else { + # otherwise I already have the column as a vector + curr_genotypes[,i] = clauses[[i]]$pattern + # if it is a list + if(length(hypotheses$llist) == 0) { + hypotheses$llist = clauses[[i]]$hypotheses$llist + } + else { + hypotheses$llist = list(c(unlist(clauses[[i]]$hypotheses$llist),unlist(hypotheses$llist))) + } + function.name = paste(function.name, "_", clauses[[i]]$function.name, sep="") + function.inputs = c(function.inputs, clauses[[i]]$function.name) + + } + } + + result = list(curr_genotypes=curr_genotypes, + hypotheses=hypotheses, + function.name=function.name, + function.inputs=function.inputs, + tests = pairwise.fisher.test(curr_genotypes)) + + # save the new edges + for(k in 1:length(result$function.inputs)) { + lifting.edges = rbind(lifting.edges, c(result$function.inputs[[k]], result$function.name)) + assign("lifting.edges", lifting.edges, envir=.GlobalEnv) + } + + return(result) + + } else { + stop("[ERR] Either the genotypes or the pattern not provided! No hypothesis will be created.") + } + return(NA) +} + +#' AND hypothesis +#' @title AND +#' @param ... Atoms of the co-occurance pattern given either as labels or as partielly lifted vectors. +#' @return Vector to be added to the lifted genotype resolving the co-occurance pattern +#' @export AND +AND = function( ... ) { + # look for the global variables named lifting.genotypes and lifting.annotations + genotypes = lifting.genotypes + annotations = lifting.annotations + if(!is.null(genotypes) && !is.null(annotations) && length(list(...))>0) { + # get the vector of the clauses of the pattern from the genotypes + result = aux.log(genotypes,annotations, "AND" ,...) + curr_genotypes = result$curr_genotypes + hypotheses = result$hypotheses + function.name = result$function.name + function.inputs = result$function.inputs + + #save the fisher exact tests + if(length(result$tests)>0) { + for(i in 1:length(result$tests)) { + curr.test = result$tests[[i]] + odds.ratio = curr.test[2] + + if(curr.test[2]<=0) { + curr.pvalue = 1 + } + else { + curr.pvalue = curr.test[1] + } + + fisher.pvalues = append(fisher.pvalues,curr.pvalue) + } + assign("fisher.pvalues", fisher.pvalues, envir=.GlobalEnv) + } + + # evaluate the AND operator + pattern = rep(0,nrow(genotypes)) + for (i in 1:nrow(genotypes)) { + pattern[i] = sum(curr_genotypes[i,]) + if(pattern[i]0) { + # get the vector of the clauses of the pattern from the genotypes + result = aux.log(genotypes,annotations,"OR",...) + curr_genotypes = result$curr_genotypes + hypotheses = result$hypotheses + function.name = result$function.name + function.inputs = result$function.inputs + + # save the fisher exact tests + if(length(result$tests)>0) { + for(i in 1:length(result$tests)) { + curr.test = result$tests[[i]] + curr.p.value = 1 - curr.test[1] + + fisher.pvalues = append(fisher.pvalues,curr.p.value) + } + assign("fisher.pvalues", fisher.pvalues, envir=.GlobalEnv) + } + + # evaluate the OR operator + pattern = rep(0,nrow(genotypes)) + for (i in 1:nrow(genotypes)) { + pattern[i] = sum(curr_genotypes[i,]) + if(pattern[i]>1) { + pattern[i] = 1 + } + } + pattern = as.integer(pattern) + result = list(pattern=pattern, hypotheses=hypotheses, function.name=function.name, function.inputs=function.inputs, fisher.pvalues=fisher.pvalues) + return(result) + } else { + stop("[ERR] Either the genotypes or the pattern not provided! No hypothesis will be created."); + } + return(NA) +} + +#' XOR hypothesis +#' @title XOR +#' @param ... Atoms of the hard exclusive pattern given either as labels or as partielly lifted vectors. +#' @return Vector to be added to the lifted genotype resolving the hard exclusive pattern +#' @export XOR +XOR = function( ... ) { + #look for the global variables named lifting.genotypes and lifting.annotations + genotypes = lifting.genotypes + annotations = lifting.annotations + if(!is.null(genotypes) && !is.null(annotations) && length(list(...))>0) { + #get the vector of the clauses of the pattern from the genotypes + result = aux.log(genotypes,annotations,"XOR",...) + curr_genotypes = result$curr_genotypes + hypotheses = result$hypotheses + function.name = result$function.name + function.inputs = result$function.inputs + + # save the fisher exact tests + if(length(result$tests)>0) { + for(i in 1:length(result$tests)) { + curr.test = result$tests[[i]] + odds.ratio = curr.test[2] + + if(curr.test[2]>=0) { + curr.pvalue = 1 + } + else { + curr.pvalue = curr.test[1] + } + + fisher.pvalues = append(fisher.pvalues,curr.pvalue) + } + assign("fisher.pvalues", fisher.pvalues, envir=.GlobalEnv) + } + + # evaluate the XOR operator + pattern = rep(0,nrow(genotypes)) + for (i in 1:nrow(genotypes)) { + pattern[i] = sum(curr_genotypes[i,]) + if(pattern[i]>1) { + pattern[i] = 0 + } + } + pattern = as.integer(pattern) + result = list(pattern=pattern, hypotheses=hypotheses, function.name=function.name, function.inputs=function.inputs, fisher.pvalues=fisher.pvalues) + return(result) + } else { + stop("[ERR] Either the genotypes or the pattern not provided! No hypothesis will be created."); + } + return(NA); +} + +# Return the position in the genotypes of the event referring to the given label.event +emap = function( label.event, genotypes, annotations ) { + col.num = -1; + events.name = ""; + if(!is.null(genotypes) && !is.null(annotations)) { + if(label.event[2]!="*") { + curr.events = which(annotations[,"event"]==label.event[1]&annotations[,"type"]==label.event[2]); + } + else { + curr.events = which(annotations[,"event"]==label.event[1]); + } + if(length(curr.events)>0) { + events.name = names(curr.events); + col.num = which(colnames(genotypes)%in%events.name); + } + } + else { + stop("[ERR] A genotypes must be available in order to define any hypothesis!",call.=FALSE); + } + results = list(col.num=col.num,events.name=events.name); + return(results); +} + + +# Return the adjacency matrix of the pattern given the list of edges involving it +get.lifted.pattern = function( lifted.edges ) { + #structure to save the adjacency matrix + lifted.adj.matrix = array(0,c(length(unique(c(lifted.edges[,1],lifted.edges[,2]))),length(unique(c(lifted.edges[,1],lifted.edges[,2]))))); + rownames(lifted.adj.matrix) = unique(c(lifted.edges[,1],lifted.edges[,2])); + colnames(lifted.adj.matrix) = rownames(lifted.adj.matrix); + #build the matrix given the lifted.edges + for(i in 1:nrow(lifted.edges)) { + lifted.adj.matrix[lifted.edges[i,1],lifted.edges[i,2]] = 1; + } + return(lifted.adj.matrix); +} + +#' evaluate cycles involving any hypothesis +#' @title hypothesis.evaluate.cycles +#' @param data input genotypes and its hypotheses +#' @param adj.matrix adjacency matrix of the reconstructed topology +#' @param hypotheses.labels label of all the existing hypotheses +#' @param weights.matrix weights of any edge in the topology +hypothesis.evaluate.cycles = function(data, + adj.matrix, + hypotheses.labels, + weights.matrix) + +{ + + # create the structures where to save the weights in increasing order of confidence + ordered.weights <- vector(); + ordered.edges <- list(); + + # create a map structure where to save the atomic events of each hypothesis + matomic = new.env(hash=TRUE,parent=emptyenv()); + + # create a map structure where to save the hypotheses of each atomic event + mhypotheses = new.env(hash=TRUE,parent=emptyenv()); + + # evaluate all the existing hypotheses + for (i in 1:length(hypotheses.labels)) { + + #evaluate the current hypothesis + connections = hypothesis.connections(adj.matrix, hypotheses.labels[i]); + connections = hypothesis.expand.connections(label=hypotheses.labels[i],events=pattern.events(data,hypotheses.labels[i]),incoming=connections$incoming,outgoing=connections$outgoing,hnames=colnames(adj.matrix),matomic=matomic,weights.matrix=weights.matrix); + + #save the results for the current hypothesis + ordered.weights = c(ordered.weights,connections$ordered.weights); + ordered.edges = c(ordered.edges,connections$ordered.edges); + matomic = connections$matomic; + + } + + #add to the map the link between atomic to pattern + for (i in 1:ncol(adj.matrix)) { + if(!is.null(data$atoms[[colnames(adj.matrix)[i]]])) { + #add to the map the hypotheses of this atomic event + mhypotheses[[toString(i)]] = which(colnames(adj.matrix)%in%data$atoms[[colnames(adj.matrix)[i]]]); + } + } + + #return the results + return(list(ordered.weights=ordered.weights,ordered.edges=ordered.edges,matomic=matomic,mhypotheses=mhypotheses)); +} + +#' given the adj.matrix, return the incoming and outgoing connections for any hypothesis +#' @title hypothesis.connections +#' @param adj.matrix adjacency matrix of the topology +#' @param hypotheses.label label of the hypothesis +hypothesis.connections = function( adj.matrix, hypotheses.label ) { + + hypotheses.label = hypotheses.label[hypotheses.label %in% rownames(adj.matrix)] + + incoming = rownames(adj.matrix)[which(adj.matrix[,hypotheses.label]==1)]; + outgoing = colnames(adj.matrix)[which(adj.matrix[hypotheses.label,]==1)]; + connections = list(incoming=incoming,outgoing=outgoing); + + return(connections); + +} + +#' expand and enumerate all the connections incoming or outgoing an hypothesis +#' @title hypothesis.expand.connections +#' @param label name of the hypothesis +#' @param events events in the hypothesis +#' @param incoming incoming connections +#' @param outgoing outgoing connections +#' @param hnames todo +#' @param matomic todo +#' @param weights.matrix weights of any edge in the topology +hypothesis.expand.connections = function(label, + events, + incoming, + outgoing, + hnames, + matomic, + weights.matrix) + +{ + + # create the structures where to save the weights in increasing order of confidence + ordered.weights <- vector() + ordered.edges <- list() + + # get the position of the hypothesis + hypothesis.pos = which(hnames==label) + + # evalutate the incoming and outgoing connections + curr.edge.pos = 0; + if(length(incoming)>0) { + for(i in 1:length(incoming)) { + ordered.weights = rbind(ordered.weights,weights.matrix[which(hnames==incoming[i]),hypothesis.pos]); + curr.edge.pos = curr.edge.pos + 1 + new.edge <- array(0, c(2,1)) + new.edge[1,1] = which(hnames==incoming[i]) + new.edge[2,1] = hypothesis.pos + ordered.edges[curr.edge.pos] = list(new.edge) + } + } + if(length(outgoing)>0) { + for(i in 1:length(outgoing)) { + ordered.weights = rbind(ordered.weights,weights.matrix[hypothesis.pos,which(hnames==outgoing[i])]); + curr.edge.pos = curr.edge.pos + 1 + new.edge <- array(0, c(2,1)) + new.edge[1,1] = hypothesis.pos + new.edge[2,1] = which(hnames==outgoing[i]) + ordered.edges[curr.edge.pos] = list(new.edge) + } + } + + if(length(hypothesis.pos) > 0) { + matomic[[toString(hypothesis.pos)]] = which(hnames%in%events) #add to the map the atomic events of this hypothesis + } else { + print('hypothesis.pos == 0!') + } + + # return the results + return(list(ordered.weights=ordered.weights,ordered.edges=ordered.edges,matomic=matomic)); + +} + +# given the hypotheses and the adj.matrix, return the updated adj.matrix +hypothesis.adj.matrix = function(hypotheses, adj.matrix) { + + if(!is.na(hypotheses[1])) { + + # set the invalid entries in the adj.matrix + # hypotheses can not be causing other hypotheses + adj.matrix[(ncol(adj.matrix)-hypotheses$num.hypotheses+1):ncol(adj.matrix),(ncol(adj.matrix)-hypotheses$num.hypotheses+1):ncol(adj.matrix)] = 0; + + # consider the given hypotheses only against the specified possible effects + adj.matrix[(ncol(adj.matrix)-hypotheses$num.hypotheses+1):ncol(adj.matrix),1:(ncol(adj.matrix)-hypotheses$num.hypotheses)] = 0 + adj.matrix[1:(ncol(adj.matrix)-hypotheses$num.hypotheses),(ncol(adj.matrix)-hypotheses$num.hypotheses+1):ncol(adj.matrix)] = 0 + + # set the elements from the hlist + for (i in 1:nrow(hypotheses$hlist)) { + cause = which(colnames(adj.matrix)%in%hypotheses$hlist[i,"cause"]); + effect = which(colnames(adj.matrix)%in%hypotheses$hlist[i,"effect"]); + if(length(cause)>0 && length(effect)>0) { + adj.matrix[cause,effect] = 1; + } + } + } + return(adj.matrix); + +} + +# internal testing function +testing = function(data, g1, g2) { + + # Dataframe di tutto il genotypes + df = data.frame(row.names=as.samples(data)) + df$x = rowSums(as.gene(data, genes=g1)) + df$y = rowSums(as.gene(data, genes=g2)) + + # Lifting xor + df$xor = df$x + df$y + df$xor[ df$xor > 1] = 0 + + # Lifting or + df$or = df$x + df$y + df$or[ df$or > 1] = 1 + + # Lifting and + df$and = df$x + df$y + df$and[ df$and < 2] = 0 + df$and[ df$and == 2] = 1 + + # Nomi per accedere successivamente + names(df$x) = g1 + names(df$y) = g2 + names(df$xor) = 'xor' + names(df$or) = 'or' + names(df$and) = 'and' + + cat('genotypes\n') + print(df) + + # Tabella di contingenza 2x2 + table.xor = rbind( + c(nrow(df) - sum(df$or), sum(df$or - df$y)), + c(sum(df$or - df$x), sum(df$and)) + ) + + colnames(table.xor) = c(paste0('-', g1), paste0('+', g1)) + rownames(table.xor) = c(paste0('-', g2), paste0('+', g2)) + + cat('\nCATEGORICAL\n') + print(table.xor) + + # Fisher 2-sided + test = fisher.test(table.xor) + + # p-value e log dell’odds ratio + cat('p-value (2-sided): ', test$p.value, '\n') + cat('log(odds ratio): ', log(test$estimate['odds ratio'])) +} + +# performs pairwise exact fisher test +pairwise.fisher.test = function(data) { + + # structure to save the results + results = vector(); + + if(ncol(data)>1) { + for(i in 1:ncol(data)) { + for(j in i:ncol(data)) { + if(i!=j) { + + df = data[,c(i,j)] + df_x = data[,1] + df_y = data[,2] + + # Lifting xor + df_xor = df_x + df_y + df_xor[ df_xor > 1] = 0 + + # Lifting or + df_or = df_x + df_y + df_or[ df_or > 1] = 1 + + # Lifting and + df_and = df_x + df_y + df_and[ df_and < 2] = 0 + df_and[ df_and == 2] = 1 + + # 2x2 contingency table + table.xor = rbind( + c(nrow(df) - sum(df_or), sum(df_or - df_y)), + c(sum(df_or - df_x), sum(df_and)) + ) + + # Fisher 2-sided + test = fisher.test(table.xor) + + # save the results + curr_result = c(test$p.value,log(test$estimate['odds ratio'])) + results = append(results, list(curr_result)) + + } + } + } + } + + return(results) + +} diff --git a/R/check.dataset.R b/R/check.dataset.R deleted file mode 100644 index 14174af1..00000000 --- a/R/check.dataset.R +++ /dev/null @@ -1,127 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#check if the dataset is valid accordingly to the probability raising -#INPUT: -#dataset: a dataset describing a progressive phenomenon -#verbose: should I print the warnings? Yes if TRUE, no otherwise -#RETURN: -#valid.dataset: a dataset valid accordingly to the probability raising -"check.dataset" <- -function(dataset,verbose) { - #perform the preprocessing only if I have at least two binary events and two samples - if(length(ncol(dataset))>0 && ncol(dataset)>1 && length(nrow(dataset))>0 && nrow(dataset)>1 && length(dataset[dataset==0|dataset==1])==nrow(dataset)*ncol(dataset)) { - #structure to compute the observed and observed joint probabilities - pair.count <- array(0, dim=c(ncol(dataset), ncol(dataset))); - #compute the probabilities on the dataset - for(i in 1:ncol(dataset)) { - for(j in 1:ncol(dataset)) { - val1 = dataset[ ,i]; - val2 = dataset[ ,j]; - pair.count[i,j] = (t(val1) %*% val2); - } - } - #marginal.probs is an array of the observed marginal probabilities - marginal.probs <- array(as.matrix(diag(pair.count)/nrow(dataset)),dim=c(ncol(dataset),1)); - #joint.probs is an array of the observed joint probabilities - joint.probs <- as.matrix(pair.count/nrow(dataset)); - #remove the events with marginal observed probability not strictly in (0,1), if any - valid.cols = marginal.probs[,1]>0 & marginal.probs[,1]<1; - not.valid = which(!valid.cols); - #save the list of the events to be dropped, if any - removed.events = NA; - removed.num = 0; - if(length(not.valid)>0) { - removed.events = vector(); - for(i in 1:length(not.valid)) { - removed.num = removed.num + 1; - removed.events[removed.num] = not.valid[i]; - if(verbose==TRUE) { - warning(paste("Event of column ",toString(not.valid[i])," is not valid and will be discarded.",sep="")); - } - } - } - #merge groups of events if they are not distinguishable - merged.events = NA; - merged.num = 0; - for(i in 1:ncol(dataset)) { - for(j in i:ncol(dataset)) { - #if the edge is valid, i.e., not self cause and not in removed.events - if(i!=j && (removed.num==0 || (any(removed.events==i)==FALSE && any(removed.events==j)==FALSE))) { - #if the two considered events are not distinguishable - if((joint.probs[i,j]/marginal.probs[i])==1 && (joint.probs[i,j]/marginal.probs[j])==1) { - #add the events to the merge list - if(merged.num==0) { - merged.events = vector(); - } - merged.num = merged.num + 1; - merged.events[merged.num] = i; - merged.num = merged.num + 1; - merged.events[merged.num] = j; - #add the second event to the drop list - if(removed.num==0) { - removed.events = vector(); - } - removed.num = removed.num + 1; - removed.events[removed.num] = j; - #merge the names in header of the data frame - names(dataset)[i] = paste(names(dataset)[i],"_and_",names(dataset)[j],sep=""); - names(dataset)[j] = names(dataset)[i]; - if(verbose==TRUE) { - warning(paste("Events of columns ",toString(i)," and ",toString(j)," are not distinguishable and they will be merged.",sep="")); - } - } - } - } - } - #save merged.events to an array - if(merged.num>0) { - merged.events = t(array(merged.events, dim=c(2,length(merged.events)/2))); - } - #save the valid dataset - if(removed.num>0) { - valid.dataset = dataset[,-removed.events]; - valid.marginal.probs = marginal.probs[-removed.events,]; - valid.marginal.probs = array(valid.marginal.probs, dim=c(length(valid.marginal.probs),1)); - valid.joint.probs = joint.probs[-removed.events,-removed.events]; - } - else { - valid.dataset = dataset; - valid.marginal.probs = marginal.probs; - valid.marginal.probs = array(valid.marginal.probs, dim=c(length(valid.marginal.probs),1)); - valid.joint.probs = joint.probs; - } - #if at this point I still have at least two events, the dataset is valid - if(length(ncol(valid.dataset))>0 && ncol(valid.dataset)>1) { - invalid.events = list(removed.events=removed.events,merged.events=merged.events); - valid.dataset = list(dataset=valid.dataset,invalid.events=invalid.events,marginal.probs=valid.marginal.probs,joint.probs=valid.joint.probs,is.valid=TRUE); - } - #if the dataset is not valid, we stop here - else { - valid.dataset = list(dataset=NA,invalid.events=NA,marginal.probs=NA,joint.probs=NA,is.valid=FALSE); - } - } - #if the dataset is not valid, we stop here - else { - if(verbose==TRUE) { - warning("The dataset must contain at least two binary events and two samples."); - } - valid.dataset = list(dataset=NA,invalid.events=NA,marginal.probs=NA,joint.probs=NA,is.valid=FALSE); - } - return(valid.dataset); -} diff --git a/R/check.events.R b/R/check.events.R deleted file mode 100644 index 64088c1f..00000000 --- a/R/check.events.R +++ /dev/null @@ -1,106 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -# Checks if events variable is well declared -check.events <- function(events, types, file.or.function = FALSE){ - - # if file.or.function is set TRUE the error messages are about - # the function that loads events from file - if(file.or.function) - msg <- "file" - else - msg <- "" - - # Looks for declaration duplicates - dup <- c() - # p flag is set if duplicate by name or type are found - p <- FALSE - i <- 1 - while(i <= nrow(events)){ - j <- 1 - while(j <= (i-1)){ - # Checks if an event is duplicated by name and type - if((events[j, "event"] == events[i, "event"]) && (events[j, "type"] == events[i, "type"])){ - dup <- union(dup, c(j)) - p <- TRUE - } - j <- j + 1 - } - i <- i + 1 - } - if(p){ - # Displays once duplicate messages - shown <- c("") - for(i in 1:length(dup)){ - if(! any(shown == events[dup[i], "event"])){ - warning(paste(toString(events[dup[i], "event"])," event redefined!", sep = ""), call. = FALSE) - shown <- union(shown, events[dup[i], "event"]) - } - } - } - - # k flag is set if more than one event is assigned to one column - dup.col <- c() - k <- FALSE - i <- 1 - while(i <= nrow(events)){ - j <- 1 - while(j <= (i-1)){ - if(events[j, "column"] == events[i, "column"]){ - dup.col <- union(dup.col, c(j)) - # If k flag is set warning messages is shown and the event will be discarded - k <- TRUE - } - j <- j + 1 - } - i <- i + 1 - } - if(k){ - for(i in 1:length(dup.col)) - #warning(paste("Event ", toString(events[dup[i], "event"]), " of type ", toString(events[dup[i], "type"]), " is redefined", sep = ""), call. = FALSE) - warning(paste("Event ", toString(events[dup.col[i], "event"]), " of type ", toString(events[dup.col[i], "type"]), " is in a column redefined below, row discarded", sep = ""), call. = FALSE) - - } - if(p || k){ - dup <- union(dup, dup.col) - events <- events[-1*dup,] - } - - - # If the types variable is found as it must, for each row this section checks the integrity of each column - # in the events variable - if(exists("types") && (length(types) > 0)){ - for(i in 1:nrow(events)){ - if(!search.type(toString(events[i, "type"]))) - warning(paste("Type ", toString(events[i,"type"]), " is not found!", sep = ""), call. = FALSE) - if(!is.wholenumber(events[i, "column"])) - stop(paste(toString(events[i,"event"])," column must be an integer value!", sep = ""), call. = FALSE) - if(events[i, "column"] < 0){ - e <- toString(events[i,"event"]) - events <- events[-1*i,] - stop(paste(e," column must be an integer positive value row discarded!", sep = ""), call. = FALSE) - } - } - }else - stop("types variable not found!", call. = FALSE) - - e <- events - row.names(e) <- c(1:nrow(e)) - return(e) - -} \ No newline at end of file diff --git a/R/check.types.R b/R/check.types.R deleted file mode 100644 index b9e59a65..00000000 --- a/R/check.types.R +++ /dev/null @@ -1,90 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -# Checks if types variable is well declared -check.types <- function(types, file.or.function = FALSE){ - - if(file.or.function) - msg <- "file" - else - msg <- "" - - # Looks for declaration duplicates - dup <- c() - # K flag is set if duplicate by type name are found - k <- FALSE - # P plag is set if duplicates color are found - p <- FALSE - - - i <- 1 - while(i <= nrow(types)){ - j <- 1 - while(j <= (i-1)){ - # Checks if a type is duplicated by type name - if((types[j, "type"] == types[i, "type"])){ - dup <- union(dup, c(j)) - k <- TRUE - } - j <- j + 1 - } - i <- i + 1 - } - - # If any duplicate is found the lastone is kept and a warning message is shown - if(k){ - types.dup <- types - types <- types[-1*dup,] - shown <- c("") - for(i in 1:length(dup)){ - if(! any(shown == types.dup[dup[i], "type"])){ - r <- match(types.dup[dup[i], "type"], types[,"type"]) - warning(paste("Event type ", - toString(types.dup[dup[i], "type"]), - " redefined, now has color: ", - toString(types[r,"color"]), - sep = ""), call. = FALSE) - - shown <- union(shown, types.dup[dup[i], "type"]) - } - } - } - - i <- 1 - while(i <= nrow(types)){ - j <- 1 - while(j <= (i-1)){ - # Checks if any set of type have the same color - if((types[j, "color"] == types[i, "color"]) && (types[j, "type"] != types[i, "type"])){ - p <- TRUE - } - j <- j + 1 - } - i <- i + 1 - } - # If there is more than one type with the same color declared a warning message is displayed - if(p) - warning("There are multiple events with the same color defined.", call. = FALSE) - - - # reset - t <- types - row.names(t) <- c(1:nrow(t)) - return(t) - -} \ No newline at end of file diff --git a/R/confidence.R b/R/confidence.R deleted file mode 100644 index 1bcb025f..00000000 --- a/R/confidence.R +++ /dev/null @@ -1,160 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @import lattice -#' @export confidence.data.joint -#' @export confidence.fit.joint -#' @export confidence.data.single -#' @export confidence.fit.single -#' @export confidence.data.conditional -#' @export confidence.fit.conditional -#' @export confidence.single -#' @export confidence.joint -#' @export confidence.conditional -#' @name confidence -#' @title provides various kinds of confidence measures for an inferred progression model -#' @description -#' A set of functions to visualise and compare the probability of each event in the progression model, as well as their joint and conditional distributions. These can be evaluated both in the data (observed probabilities) and in the reconstructed model (fitted probabilities). -#' -#' @param topology A topology returned by the reconstruction algorithm -#' @usage confidence.data.joint(topology) -#' @details \code{confidence.data.joint} plot the pairwise observed joint probability of the events -confidence.data.joint <- function(topology){ - - if(missing(topology)) - stop("Missing parameter for confidence.data.joint function: confidence.data.joint(topology)", call. = FALSE) - # Displays the joint.probs slot of the topology variable - print(topology@joint.probs) - # Plots a "levelplot" for joint.probs slot in topology variable - levelplot(topology@joint.probs, xlab = "", ylab = "", scales = list(x = list(alternating = 2, rot = 90), tck = 0), main = "Data (joint probability)") -} - -#' @rdname confidence -#' @usage confidence.fit.joint(topology) -#' @details \code{confidence.fit.joint} plot the pairwise fitted joint probability of the events -confidence.fit.joint <- function(topology){ - if(missing(topology)) - stop("Missing parameter for confidence.fit.joint function: confidence.fit.joint(topology)", call. = FALSE) - print(topology@estimated.joint.probs) - levelplot(topology@joint.probs, xlab = "", ylab = "", scales = list(x = list(alternating = 2, rot = 90), tck = 0), main = "Fit (joint probability)") -} - -#' @rdname confidence -#' @usage confidence.data.single(topology) -#' @details \code{confidence.data.single} plot the observed probability of each event -confidence.data.single <- function(topology){ - if(missing(topology)) - stop("Missing parameter for confidence.data.single function: confidence.data.single(topology)", call. = FALSE) - # Gets names from the marginal.prob matrix header - names <- rownames(topology@marginal.probs) - v <- as.vector(topology@marginal.probs) - # Sets the name for each element in vector v - names(v) <- names - # Sets borders for the plot window - par("mar" = c((max(nchar(names))/1.6), 2, 2, 1), "mfrow" = c(1,1)) - barplot(v, las = 3, main = "Data (events probability)") - print(v) -} - -#' @rdname confidence -#' @usage confidence.fit.single(topology) -#' @details \code{confidence.fit.single} plot the fitted probability of each event -confidence.fit.single <- function(topology){ - if(missing(topology)) - stop("Missing parameter for confidence.fit.single function: confidence.fit.single(topology)", call. = FALSE) - names <- rownames(topology@estimated.marginal.probs) - v <- as.vector(topology@estimated.marginal.probs) - names(v) <- names - #Elimino i bordi attorno al plot - par("mar" = c((max(nchar(names))/1.6), 2, 2, 1), "mfrow" = c(1,1)) - barplot(v, las = 3, main = "Fit (events probability)") - print(v) -} - -#' @rdname confidence -#' @usage confidence.data.conditional(topology) -#' @details \code{confidence.data.conditional} plot the pairwise observed conditional probability of the events -confidence.data.conditional <- function(topology){ - if(missing(topology)) - stop("Missing parameter for confidence.data.conditional function: confidence.data.conditional(topology)", call. = FALSE) - names <- rownames(topology@cond.probs) - v <- as.vector(topology@cond.probs) - names(v) <- names - par("mar" = c((max(nchar(names))/1.6), 2, 2, 1), "mfrow" = c(1,1)) - barplot(v, las = 3, main = "Data (conditional probability)") - print(v) -} - -#' @rdname confidence -#' @usage confidence.fit.conditional(topology) -#' @details \code{confidence.fit.conditional} plot the pairwise fitted conditional probability of the events -confidence.fit.conditional <- function(topology){ - if(missing(topology)) - stop("Missing parameter for confidence.fit.conditional function: confidence.fit.conditional(topology)", call. = FALSE) - names <- rownames(topology@estimated.cond.probs) - v <- as.vector(topology@estimated.cond.probs) - names(v) <- names - par("mar" = c((max(nchar(names))/1.6), 2, 2, 1), "mfrow" = c(1,1)) - barplot(v, las = 3, main = "Fit (conditional probability)") - print(v) -} - -#' @rdname confidence -#' @usage confidence.single(topology) -#' @details \code{confidence.single} plot the difference between the observed and fitted probability of each event -confidence.single <- function(topology){ - if(missing(topology)) - stop("Missing parameter for confidence.single function: confidence.single(topology)", call. = FALSE) - data <- topology@marginal.probs - fit <- topology@estimated.marginal.probs - names <- rownames(data) - v <- as.vector(data - fit) - names(v) <- names - par("mar" = c((max(nchar(names))/1.6), 2, 1, 1), "mfrow" = c(1,1)) - barplot(v, las = 3, main = "Confidence (events probability)") - print(v) -} - -#' @rdname confidence -#' @usage confidence.joint(topology) -#' @details \code{confidence.joint} plot the pairwise difference between the observed and fitted joint probability of the events -confidence.joint <- function(topology){ - if(missing(topology)) - stop("Missing parameter for confidence.joint function: confidence.joint(topology)", call. = FALSE) - data <- topology@joint.probs - fit <- topology@estimated.joint.probs - v <- data - fit - print(v) - levelplot(v, xlab = "", ylab = "", scales = list(x = list(alternating = 2, rot = 90), tck = 0), main = "Confidence (joint probability)") -} - -#' @rdname confidence -#' @usage confidence.conditional(topology) -#' @details \code{confidence.conditional} plot the pairwise difference between the observed and fitted conditional probability of the events -confidence.conditional <- function(topology){ - if(missing(topology)) - stop("Missing parameter for confidence.conditional function: confidence.conditional(topology)", call. = FALSE) - data <- topology@cond.probs - fit <- topology@estimated.cond.probs - names <- rownames(data) - v <- as.vector(data - fit) - names(v) <- names - par("mar" = c((max(nchar(names))/1.6), 2, 2, 1), "mfrow" = c(1,1)) - barplot(v, las = 3, main = "Confidence (conditional probability)") - print(v) -} diff --git a/R/correctness.R b/R/correctness.R new file mode 100644 index 00000000..b91a349d --- /dev/null +++ b/R/correctness.R @@ -0,0 +1,123 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + + +#' Check if 'x' is compliant with TRONCO's input: that is if it has dataframes +#' x$genotypes, x$annotations, x$types and x$stage (optional) +#' @title is.compliant +#' @param x A TRONCO compliant dataset. +#' @param err.fun string which identifies the function which called is.compliant +#' @param stage boolean flag to check x$stage datagframe +#' @return on error stops the computation +#' @export is.compliant +is.compliant = function(x, err.fun='[ERR]', stage = !(all(is.null(x$stages)) || all(is.na(x$stages)))) +{ + # Check if x is defined + if(is.null(x) || is.na(x)) + stop(paste(err.fun, ': input \'x\' is null.')) + + # Check then if x is a list + if(!is.list(x)) + stop(paste(err.fun, ': input \'x\' is not a list.')) + + # Check then if x has required fields + if(is.null(x$genotypes) || any(is.na(x$genotypes))) + stop(paste(err.fun, ': input \'x\' has no genotypes field.')) + else + if(!is.matrix(x$genotypes) && !is.data.frame(x$genotypes)) stop(paste(err.fun, ': attribute genotypes in \'x\' is not a matrix.')) + + colnames(x$annotations) = c('type', 'event') + colnames(x$types) = c('color') + + if(is.null(x$annotations) || any(is.na(x$annotations))) + stop(paste(err.fun, ': input \'x\' has no annotations field.')) + else + if(!is.matrix(x$annotations) && !is.data.frame(x$annotations)) stop(paste(err.fun, ': attribute annotations in \'x\' is not a matrix.')) + + if(is.null(x$types) || any(is.na(x$types))) + stop(paste(err.fun, ': input \'x\' has no types field.')) + else + if(!is.matrix(x$types) && !is.data.frame(x$types)) stop(paste(err.fun, ': attribute types in \'x\' is not a matrix.')) + + if(stage == TRUE && (is.null(x$stages) || all(is.na(x$stages)))) + stop(paste(err.fun, ': input \'x\' has no stage field.')) + else + if(stage == TRUE && !is.matrix(x$stages) && !is.data.frame(x$stages)) stop(paste(err.fun, ': attribute stage in \'x\' is not a matrix.')) + + # Annotations sould be present for all genotypes columns + if(nrow(x$annotations) != ncol(x$genotypes) ) + stop(paste(err.fun, ': input \'x\' has less annotations than expected.')) + if(!all(colnames(x$genotypes) == rownames(x$annotations))) + stop(paste(err.fun, ': input \'x\' has inconsistent annotations.')) + + # Types should be defined for every annotation + if(nrow(x$types) != length(unique(x$annotations[,1]))) + { + message('[ERROR] rownames(x$types):', paste(rownames(x$types), collapse=', ', sep='')) + message('[ERROR] Annotated event types:', paste(unique(x$annotations[,1]), collapse=', ', sep='')) + + stop(paste(err.fun, ': input \'x\' has less types than expected.')) + } + + if(!all(unique(x$annotations[,'type']) %in% rownames(x$types))) + { + stop(paste(err.fun, ': input \'x\' has inconsistent types (', + paste(unique(x$annotations[,'type']), collapse=',') + ,' vs ', paste(rownames(x$types), collapse=',') ,').', + sep='')) + } + + # Stage should be defined for every samples + if(stage == TRUE && nrow(x$stages) != nrow(x$genotypes)) + stop(paste(err.fun, ': input \'x\' has less stages than expected.')) + if(stage == TRUE && !all(rownames(x$stages) == rownames(x$genotypes))) + stop(paste(err.fun, ': input \'x\' has inconsistent stages.')) + + if(stage == TRUE) colnames(x$stages) = c('stage') + + dup = duplicated(x$annotations) + if(any(dup)) + { + cat("Duplicated events in \'x\': \n") + print(head(x$annotations[dup, ])) + stop('Duplicated events.') + } + } + + +#' Check if x is a valid TRONCO model +#' @title is.model +#' @param x A TRONCO compliant dataset. +is.model = function(x) +{ + if(!'model' %in% names(x)) + stop('Input object is not a TRONCO model.') +} + + +#' Check if y is a valid event list for x +#' @title is.events.list +#' @param x A TRONCO compliant dataset. +#' @param y A TRONCO event list +is.events.list = function(x, y) +{ + if(!is.matrix(y)) stop('Events should be given as matrix - see "as.events".') + if(ncol(y) != 2 || + !all(c('type', 'event') %in% colnames(y)) + ) stop('Events are missing column "type" (type of event) or "event" (gene symbol) - see "as.events".') + + if (!all(rownames(y) %in% colnames(x$genotypes))) + stop('Events rownames are not valid keys for genotypes - see "as.events".') +} diff --git a/R/data.R b/R/data.R new file mode 100644 index 00000000..07f32c92 --- /dev/null +++ b/R/data.R @@ -0,0 +1,124 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + +#' @name maf +#' @title MAF example data +#' @description This dataset contains a standard MAF input for TRONCO +#' @docType data +#' @usage data(maf) +#' @format Manual Annotated Format +#' @source fake data +#' @author Luca De Sano +NULL + +#' @name gistic +#' @title GISTIC example data +#' @description This dataset contains a standard GISTIC input for TRONCO +#' @docType data +#' @usage data(gistic) +#' @format GISTIC score +#' @source fake data +#' @author Luca De Sano +NULL + +#' @name muts +#' @title Simple mutation dataset +#' @description A simple mutation dataset without hypotheses +#' @docType data +#' @usage data(muts) +#' @format TRONCO compliant dataset +#' @source fake data +#' @author Luca De Sano +NULL + +#' @name as.genotypes.test +#' @title as genotypes matrix +#' @description This data set list ... +#' @docType data +#' @usage data(as.genotypes.test) +#' @format matrix +#' @source da mettere +#' @author Luca De Sano +NULL + +#' @name as.events.test +#' @title as events matrix +#' @description This data set list ... +#' @docType data +#' @usage data(as.events.test) +#' @format matrix +#' @source fake data +#' @author Luca De Sano +NULL + +#' @name as.stages.test +#' @title as stages matrix +#' @description This data set list ... +#' @docType data +#' @usage data(as.stages.test) +#' @format matrix +#' @source fake data +#' @author Luca De Sano +NULL + +#' @name test_dataset_no_hypos +#' @title A complete dataset +#' @description This dataset contains a complete test dataset +#' @docType data +#' @usage data(test_dataset_no_hypos) +#' @format TRONCO compliant dataset +#' @source fake data +#' @author Luca De Sano +NULL + +#' @name test_dataset +#' @title A complete dataset with hypotheses +#' @description This dataset contains a complete test dataset +#' @docType data +#' @usage data(test_dataset) +#' @format TRONCO compliant dataset +#' @source fake data +#' @author Luca De Sano +NULL + +#' @name stage +#' @title Stage information for test_dataset +#' @description This dataset contains stage information for patient in test_dataset +#' @docType data +#' @usage data(stage) +#' @format Vector of stages +#' @source fake data +#' @author Luca De Sano +NULL + +#' @name test_model +#' @title A complete dataset with a reconstructed model +#' @description This dataset contains a model reconstructed with CAPRI +#' @docType data +#' @usage data(test_model) +#' @format TRONCO compliant dataset +#' @source fake data +#' @author Luca De Sano +NULL + +#' @name aCML +#' @title Atypical chronic myeloid leukemia dataset +#' @description This file contains a TRONCO compliant dataset +#' @docType data +#' @usage data(aCML) +#' @format TRONCO compliant dataset +#' @source data from http://www.nature.com/ng/journal/v45/n1/full/ng.2495.html +#' @author Luca De Sano +NULL diff --git a/R/data.load.R b/R/data.load.R deleted file mode 100644 index 07492ba9..00000000 --- a/R/data.load.R +++ /dev/null @@ -1,113 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @export data.load -#' @title load a dataset (binary matrix) from a file or a preloaded dataset. -#' -#' @description -#' \code{data.load} sets a global data frame 'data.values' that contains the dataset loaded from an input file. -#' -#' @details -#' \code{data.load} loads a dataset from disk and associates all columns in the dataset to a specified event. Thus, types and events must be specified before calling this function to ensure a consistency check is performed on the input dataset (see \code{types.load}, \code{types.add}, \code{events.load}, \code{events.add} to load/add types/events). -#' -#' @param data.input The input file path. or a dataset loaded by \code{data} function -#' - -data.load <- function(data.input){ - - if(missing(data.input)) - stop("Missing parameter for data.load function: data.load(data.input)", call. = FALSE) - # To load data from the file linked by the given path the variables events and types must be already defined - if(exists("types") && exists("events") && (length(types) > 0) && (length(events) > 0)){ - events <- events - types <- types - - # The read.table function loads data in the data.values variable - # WARNING: warnings for this instruction are suppressed because they occurs even when - # the user do not leave a final return in the definitions file, we doesn't matter this. - if(!is.data.frame(data.input)) - data.values <- suppressWarnings(read.table(data.input, header = FALSE)) - else - data.values <- data.input - - # The number of elements found in events variable must be enough to make an association with the dataset columns. - if(ncol(data.values) > nrow(events)) stop("Events definition is not complete, not enough events defined!") - - # If an event declaration contains a number of column out of bound, an error message is displayed. - for(i in 1:nrow(events)) - if(events[i, "column"] > ncol(data.values)) - stop(paste("Found an event with column number greater than dataset colum number, event:", events[i, "event"])) - - column.names <- c() - - # Each event is associated to his column. - for(i in 1:ncol(data.values)){ - event <- search.event(i) - column.name <- c(paste(toString(event$event), " (", toString(event$type), ", column ", toString(event$column), ")", sep = "")) - column.names <- c(column.names, column.name) - } - - # check datasets - # colnames(data.values) <- column.names - tmpdata <- check.dataset(data.values, FALSE) - # Collects info for the object of class topology. - info <- infoset(tmpdata$invalid.events$merged.events, tmpdata$invalid.events$removed.events, verbose = TRUE) - assign("invalid.events", tmpdata$invalid.events , envir = .GlobalEnv) - data.values <- tmpdata$dataset - - num.events <- ncol(data.values) - num.samples <- nrow(data.values) - num.hypotheses <- 0 - - assign("num.events", num.events, envir = .GlobalEnv) - assign("num.samples", num.samples, envir = .GlobalEnv) - assign("num.hypotheses", num.hypotheses, envir = .GlobalEnv) - - if(!is.null(nrow(tmpdata$invalid.events$merged.events))){ - cat("\nThe available events are:\n") - events.label <- info$all.vis.labels - out.matrix <- events - - merged.type <- tmpdata$invalid.events$merged.events[,1] - out.matrix[merged.type,"type"] <- "merged" - - out.matrix <- out.matrix[-1*tmpdata$invalid.events$removed.events,] - - names <- cbind(info$all.vis.labels) - out.matrix[,"event"] <- names - - #columns numbers - out.matrix[, "column"] <- cbind(1:nrow(out.matrix)) - row.names(out.matrix) <- c(1:nrow(out.matrix)) - - print(out.matrix) - - } - - colnames(data.values) <- info$all.labels - assign("data.values", data.values, envir = .GlobalEnv) - if(!is.data.frame(data.input)) - cat(paste("Data successfully loaded and validated, dataset: ", toString(data.input),"\n", sep ="")) - else - cat("Data frame validated and data.values variable is loaded in global environment") - - } - else - stop("Events or types variable not found: Complete the definitions!") - -} diff --git a/R/decimal.to.binary.R b/R/decimal.to.binary.R deleted file mode 100644 index 847f313e..00000000 --- a/R/decimal.to.binary.R +++ /dev/null @@ -1,39 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#convert an integer decimal number to binary -#INPUT: -#num.decimal: decimal integer to be converted -#num.bits: number of bits to be used -#RETURN: -#num.binary: binary conversion of num.decimal -"decimal.to.binary" <- -function(num.decimal, num.bits) { - #structure where to save the result - num.binary = rep(0,num.bits); - #convert the integer decimal number to binary - pos = 0; - while(num.decimal>0) { - #compute the value of the current step - num.binary[num.bits-pos] = num.decimal %% 2; - #divide the number by 2 for the next iteration - num.decimal = num.decimal %/% 2; - pos = pos + 1; - } - return(num.binary); -} diff --git a/R/editing.functions.R b/R/editing.functions.R new file mode 100644 index 00000000..7bbba4cc --- /dev/null +++ b/R/editing.functions.R @@ -0,0 +1,972 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + +# internal function +# check issue #32 +consolidate.data = function(x, print = FALSE){ + is.compliant(x) + ind = list() + zeros = list() + ones = list() + + for(i in 1:nevents(x)) { + ev = list() + + for(j in i:nevents(x)) { + if(i != j && all(x$genotypes[, i] == x$genotypes[, j]) && !j %in% ind ) { + ev = append(ev, j) + } + } + + if(length(ev) > 0) { + ind.pool = rbind(as.events(x)[ c(i, unlist(ev)),]) + + if(all(x$genotypes[, i] == 1)) { + ones = append(ones, list(ind.pool)) + } + + if(all(x$genotypes[, i] == 0)) { + zeros = append(zeros, list(ind.pool)) + } + + if(sum(x$genotypes[, i] < nsamples(x))) { + ind = append(ind, list(ind.pool)) + } + + if(print){ + + if(all(x$genotypes[, i] == 1)) { + cat('\nEvents altered across all samples:\n') + } + if(all(x$genotypes[, i] == 0)) { + cat('\nEvents with no alterations across samples:\n') + } + + if(sum(x$genotypes[, i] < nsamples(x))) { + cat('\nIndistinguishable events:\n') + } + + print(ind.pool) + cat('Total number of events for these genes: ') + cat(paste(nevents(x, as.events(x, genes=c(ind.pool)))), '\n') + } + + } + } + + ret = NULL + ret$indistinguishable = ind + ret$zeroes = zeros + ret$ones = ones + + return(ret) +} + +#' Annotate a description on the selected dataset +#' @title annotate.description +#' +#' @examples +#' data(test_dataset) +#' annotate.description(test_dataset, 'new description') +#' +#' @param x A TRONCO compliant dataset. +#' @param label A string +#' @return A TRONCO compliant dataset. +#' @export annotate.description +annotate.description = function(x, label) +{ + if(as.description(x) != "") + warning(paste('Old description substituted: ', as.description(x), '.')) + + x$name = label + return(x) +} + +#' Annotate stage information on the selected dataset +#' @title annotate.stages +#' +#' @examples +#' data(test_dataset) +#' data(stage) +#' test_dataset = annotate.stages(test_dataset, stage) +#' as.stages(test_dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @param stages A list of stages. Rownames must match samples list of x +#' @param match.TCGA.patients Match using TCGA notations (only first 12 characters) +#' @return A TRONCO compliant dataset. +#' @export annotate.stages +annotate.stages = function(x, stages, match.TCGA.patients = FALSE) +{ + if(is.null(rownames(stages))) { + stop('Stages have no rownames - will not add annotation.') + } + + samples = as.samples(x) + + # Just for temporary - will be shortened to make a simple check... + samples.temporary = samples + if(match.TCGA.patients) { + samples.temporary = substring(samples.temporary, 0, 12) + } + + if(!any(samples.temporary %in% rownames(stages))) + stop('There are no stages for samples in input dataset - will not add annotation.') + + # Notify if something gets lost + if(has.stages(x)) { + warning('Stages in input dataset overwritten.') + } + + # Actual stages + x$stages = data.frame(row.names = samples, stringsAsFactors=FALSE) + x$stages[, 'stage'] = as.character(NA) + + for(i in 1:nsamples(x)) + { + if(!match.TCGA.patients) { + x$stages[i, ] = as.character(stages[as.samples(x)[i], ]) + } else { + # Potential match if x's samples are long TCGA barcodes and stages are TCGA patients barcdodes (short) + short.name = substring(as.samples(x)[i], 0 , 12) + x$stages[i, 'stage'] = as.character(stages[short.name, ]) + } + } + + count.na = is.na(x$stages) + if(any(count.na)) { + warning(paste(length(which(count.na)), ' missing stages were added as NA.')) + } + + return(x) +} + +#' Change the color of an event type +#' @title change.color +#' +#' @examples +#' data(test_dataset) +#' dataset = change.color(test_dataset, 'ins_del', 'red') +#' +#' @param x A TRONCO compliant dataset. +#' @param type An event type +#' @param new.color The new color (either HEX or R Color) +#' @return A TRONCO complian dataset. +#' @export change.color +change.color = function(x, type, new.color) +{ + is.compliant(x) + if(type %in% as.types(x)) { + x$types[type, ] = new.color + } else { + stop('type: \"', type, '\" not in dataset') + } + + is.compliant(x) + return(x) +} + +#' Rename an event type +#' @title rename.type +#' +#' @examples +#' data(test_dataset) +#' test_dataset = rename.type(test_dataset, 'ins_del', 'deletion') +#' +#' @param x A TRONCO compliant dataset. +#' @param old.name The type of event to rename. +#' @param new.name The new name +#' @return A TRONCO complian dataset. +#' @export rename.type +rename.type <- function(x, old.name, new.name) { + is.compliant(x, 'rename.type: input dataset') + types = as.types(x) + + if(old.name == new.name) { + return(x) + } + + if (old.name %in% types) { + x$annotations[ which(x$annotations[,'type'] == old.name), 'type' ] = new.name + if(! new.name %in% types) { + rownames(x$types)[which(rownames(x$types) == old.name)] = new.name + } else { + x$types = x$types[!rownames(x$types) %in% list(old.name), , drop=F] + } + } else { + stop(paste(old.name, 'not in as.types(x)')) + } + cat('Events of type', old.name, 'renamed as', new.name, '.\n') + + is.compliant(x, err.fun = 'rename.type: output') + return(x) +} + +#' Rename a gene +#' @title rename.gene +#' +#' @examples +#' data(test_dataset) +#' test_dataset = rename.gene(test_dataset, 'TET2', 'gene x') +#' +#' @param x A TRONCO compliant dataset. +#' @param old.name The name of the gene to rename. +#' @param new.name The new name +#' @return A TRONCO complian dataset. +#' @export rename.gene +rename.gene <- function(x, old.name, new.name) { +# if is compliant x + is.compliant(x) + + if (old.name %in% as.genes(x)) { + x$annotations[ which(x$annotations[,'event'] == old.name), 'event' ] = new.name + + } else { + stop(paste(old.name, 'not in as.genes(x)')) + } + + is.compliant(x) + return(x) +} + +#' Delete an event type +#' @title delete.type +#' +#' @examples +#' data(test_dataset) +#' test_dataset = delete.type(test_dataset, 'Pattern') +#' +#' @param x A TRONCO compliant dataset. +#' @param type The name of the type to delete. +#' @return A TRONCO complian dataset. +#' @export delete.type +delete.type <- function(x, type) { +# if is compliant x + is.compliant(x) + + if(has.model(x)) { + stop("There's a reconstructed model, a type cannot be deleted now. \nUse delete.model()") + } + + for(pattern in as.patterns(x)) { + if(type %in% as.types.in.patterns(x, patterns=pattern)) { + stop('Found type \"', type, '\" in pattern \"', pattern, '\". Delete that pattern first.\n') + } + } + + if (type %in% as.types(x)) { + events = as.events(x, types=setdiff(as.types(x), type)) + + x$genotypes = as.genotypes(x)[,rownames(events), drop=F] + x$annotations = events + x$types = x$types[which(rownames(x$types) != type), , drop=F] + } else { + stop(paste(type, 'not in as.types(x)')) + } + + is.compliant(x) + return(x) +} + +#' Delete a gene +#' @title delete.gene +#' +#' @examples +#' data(test_dataset) +#' test_dataset = delete.gene(test_dataset, 'TET2') +#' +#' @param x A TRONCO compliant dataset. +#' @param gene The name of the gene to delete. +#' @return A TRONCO complian dataset. +#' @export delete.gene +delete.gene <- function(x, gene) { +# if is compliant x + is.compliant(x, 'delete:gene: input') + + if(has.model(x)) { + stop("There's a reconstructed model, a type cannot be deleted now. \nUse delete.model()") + } + + if (all(gene %in% as.genes(x))) { + + for(pattern in as.patterns(x)) { + for(g in gene) { + if(g %in% as.genes.in.patterns(x, patterns=pattern)) { + stop('Found gene \"', g, '\" in pattern \"', pattern, '\". Delete that pattern first.\n') + } + } + } + + + drops = rownames(as.events(x, genes = gene)) + x$genotypes = x$genotypes[, -which( colnames(x$genotypes) %in% drops )] + x$annotations = x$annotations[ -which (rownames(x$annotations) %in% drops), ] + + # TO DO: something better than this t(t(...)) + x$types = x$types[ which(rownames(x$types) %in% unique(x$annotations[,"type"])), , drop=FALSE] + colnames(x$types) = 'color' + + } else { + stop(paste(gene[!(gene %in% as.genes(x))], collapse= ','), 'are not in the dataset -- as.genes(x).') + } + + is.compliant(x, 'delete:gene: output') + return(x) +} + +#' Delete an event from the dataset +#' @title delete.event +#' +#' @examples +#' data(test_dataset) +#' test_dataset = delete.event(test_dataset, 'TET2', 'ins_del') +#' +#' @param x A TRONCO compliant dataset. +#' @param gene The name of the gene to delete. +#' @param type The name of the type to delete. +#' @return A TRONCO complian dataset. +#' @export delete.event +delete.event <- function(x, gene, type) { + + for(pattern in as.patterns(x)) { + events = as.events.in.patterns(x, patterns=pattern) + if(length(which(events[,'type'] == type & events[,'event'] == gene)) > 0) { + stop('Found event \"(', gene, ', ', type, ')\" in pattern \"', pattern, '\". Delete that pattern first.\n') + } + } + + is.compliant(x, 'delete.event: input') + + if (all(c(type, gene) %in% as.events(x))) + { + drops = rownames(as.events(x, genes = gene, types = type)) + x$genotypes = x$genotypes[, -which( colnames(x$genotypes) %in% drops ), drop = FALSE] + x$annotations = x$annotations[ -which (rownames(x$annotations) %in% drops), , drop = FALSE] + + # TO DO: something better than this t(t(...)) + x$types = x$types[ which(rownames(x$types) %in% unique(x$annotations[,"type"])), , drop=FALSE] + } + else { + stop(paste(type, gene, ' - not in as.events(x)')) + } + + is.compliant(x, 'delete:gene: output') + return(x) +} + +#' Delete an hypothesis from the dataset based on a selected event. +#' Check if the selected event exist in the dataset and delete his associated hypothesis +#' @title delete.hypothesis +#' +#' @examples +#' data(test_dataset) +#' delete.hypothesis(test_dataset, event='TET2') +#' delete.hypothesis(test_dataset, cause='EZH2') +#' delete.hypothesis(test_dataset, event='XOR_EZH2') +#' +#' @param x A TRONCO compliant dataset. +#' @param event Can be an event or pattern name +#' @param cause Can be an event or pattern name +#' @param effect Can be an event or pattern name +#' @return A TRONCO complian dataset. +#' @export delete.hypothesis +delete.hypothesis = function(x, event=NA, cause=NA, effect=NA) +{ + if(has.model(x)) { + stop("There's a reconstructed model, hypotheses cannot be deleted now. \nUse delete.model()") + } + + hypo_map = as.hypotheses(x) + to_remove = c() + if(!is.na(event)) { + if(length(event) == 1 && event %in% as.events(x)[,'event']) { + cause_del = which(hypo_map[,'cause event'] == event) + effect_del = which(hypo_map[,'effect event'] == event) + to_remove = unique(c(to_remove, cause_del, effect_del)) + } else { + stop('Select only one event present in as.events') + } + } + + if(! is.na(cause)) { + if(cause %in% as.events(x)[,'event']) { + cause_del = which(hypo_map[,'cause event'] == cause) + to_remove = unique(c(to_remove, cause_del)) + } else { + stop('Wrong cause, select only events present in as.events') + } + } + + if(! is.na(effect)){ + if( effect %in% as.events(x)[,'event']) { + effect_del = which(hypo_map[,'effect event'] == effect) + to_remove = unique(c(to_remove, effect_del)) + } else { + stop('Wrong effect, select only events present in as.events') + } + } + + x$hypotheses$num.hypotheses = x$hypotheses$num.hypotheses - 1 + x$hypotheses$hlist = x$hypotheses$hlist[-to_remove, ,drop=F] + + is.compliant(x) + + return(x) +} + +#' Delete a pattern and every associated hypotheses from the dataset +#' @title delete.pattern +#' +#' @examples +#' data(test_dataset) +#' delete.pattern(test_dataset, pattern='XOR_EZH2') +#' +#' @param x A TRONCO compliant dataset. +#' @param pattern A pattern name +#' @return A TRONCO complian dataset. +#' @export delete.pattern +delete.pattern = function(x, pattern) { + if(has.model(x)) { + stop("There's a reconstructed model, a pattern cannot be deleted now. \nUse delete.model()") + } + + if(! pattern %in% as.patterns(x)) { + stop(paste(pattern, " not in as.patterns()")) + } + + x = delete.hypothesis(x, pattern) + x$annotations = x$annotations[-which(rownames(x$annotations) == pattern), , drop=F] + x$genotypes = x$genotypes[, -which(colnames(x$genotypes) == pattern), drop=F] + x$hypotheses$patterns[pattern] = NULL + x$hypotheses$pvalues = NULL + + for(atom in names(x$hypotheses$atoms)) { + if(x$hypotheses$atoms[atom] == pattern) { + x$hypotheses$atoms[atom] = NULL + } + } + + rm(list=pattern, envir=x$hypotheses$hstructure) + +# chiave da togliere model$hypotheses$`gene x` se contenuto = 'XOR_EZH2' +# remove o rm? + + is.compliant(x) + + return(x) +} + +#' Delete a reconstructed model from the dataset +#' @title delete.model +#' +#' @examples +#' data(test_model) +#' model = delete.model(test_model) +#' has.model(model) +#' +#' @param x A TRONCO compliant dataset. +#' @return A TRONCO complian dataset. +#' @export delete.model +delete.model = function(x) { + if (! has.model(x)) { + stop("No model to delete in dataset") + } + x$model = NULL + x$confidence = NULL + x$parameters = NULL + x$adj.matrix.prima.facie = NULL + x$execution.time = NULL + + is.compliant(x) + + return(x) +} + +#' Delete samples from selected dataset +#' @title delete.samples +#' +#' @examples +#' data(test_dataset) +#' dataset = delete.samples(test_dataset, c('patient 1', 'patient 4')) +#' +#' @param x A TRONCO compliant dataset. +#' @param samples An array of samples name +#' @return A TRONCO complian dataset. +#' @export delete.samples +delete.samples = function(x, samples) { + is.compliant(x, 'delete.samples input') + stages = has.stages(x) + del = list() + actual.samples = as.samples(x) + samples = unique(samples) + for (sample in samples) { + if(!sample %in% actual.samples) { + warning('Sample: ', sample, ' not in as.samples(x)') + } else { + del = append(del, sample) + } + } + + x$genotypes = x$genotypes[!rownames(x$genotypes) %in% del, , drop = F ] + + if(stages) { + x$stages = x$stages[!rownames(x$stages) %in% del, , drop=FALSE] + } + + is.compliant(x, 'delete.samples output') + + return(x) +} + + +#' Intersect samples and events of two dataset +#' @title intersect.datasets +#' +#' @examples +#' data(test_dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @param y A TRONCO compliant dataset. +#' @param intersect.genomes If False -> just samples +#' @return A TRONCO complian dataset. +#' @export intersect.datasets +intersect.datasets = function(x,y, intersect.genomes = TRUE) +{ + is.compliant(x) + is.compliant(y) + + # Common samples and genes (according to intersect.genomes) + samples = intersect(as.samples(x), as.samples(y)) + genes = ifelse(intersect.genomes, + intersect(as.genes(x), as.genes(y)), # intersect.genomes -> INTERSECTION + unique(c(as.genes(x), as.genes(y)))) # !intersect.genomes -> UNION + + report = data.frame(row.names = c('Samples', 'Genes')) + report$x = c(nsamples(x), ngenes(x)) + report$y = c(nsamples(y), ngenes(y)) + + # Restrict genes - only if intersect.genomes = T + if(intersect.genomes) { + x = events.selection(x, filter.in.names=genes) + y = events.selection(y, filter.in.names=genes) + } + + # TODO: check they have no events in common! + # if(as.events(x) ) + + # Restric stamples + x = samples.selection(x, samples) + y = samples.selection(y, samples) + + # Result + z = ebind(x,y) + + + cat('*** Intersect dataset [ intersect.genomes =', intersect.genomes, ']\n') + report$result = c(nsamples(z), ngenes(z)) + print(report) + + return(z) +} + + +#' Binds events from one or more datasets, which must be defined over the same set of samples. +#' @title ebind +#' +#' @param ... the input datasets +#' @return A TRONCO complian dataset. +#' @export ebind +ebind = function(...) +{ +# merge two datasets at a time. + events.pairwise.bind = function(x, y) + { + is.compliant(x, 'ebind: input x') + is.compliant(y, 'ebind: input y') + + samples.intersect = intersect(as.samples(x), as.samples(y)) + if(!(setequal(samples.intersect, as.samples(x)) && setequal(samples.intersect, as.samples(y)))) + stop('Datasets have different samples, won\'t bind!') + + z = list() + + y$genotypes = y$genotypes[rownames(x$genotypes), , drop = FALSE] + y$stages = y$stages[rownames(y$genotypes), , drop=FALSE] + + # Copy genotype matrix, and sets its rownames (samples) + z$genotypes = cbind(x$genotypes, y$genotypes) + colnames(z$genotypes) = paste('G', 1:ncol(z$genotypes), sep='') + + # Copy annotations for gene symbols etc. + z$annotations = rbind(x$annotations, y$annotations) + rownames(z$annotations) = colnames(z$genotypes) + + # Copy types + z$types = unique(rbind(x$types, y$types)) + + # Copy stages, if present + if(has.stages(x) && has.stages(y)) { + stages.x = as.stages(x) + stages.y = as.stages(y) + + stages.x = stages.x[!is.na(stages.x)] + stages.y = stages.y[!is.na(stages.y)] + + if(any(stages.x != stages.y)) { + stop('Patients have different stages, won\'t merge!') + } + } + + if(has.stages(x)) { + z = annotate.stages(z, as.stages(x)) + } + + is.compliant(z, 'ebind: output') + + return(z) + } + + + input = list(...) + + cat('*** Binding events for', length(input), 'datasets.\n') + return(Reduce(events.pairwise.bind, input)) +} + +#' Binds samples from one or more datasets, which must be defined over the same set of events +#' @title sbind +#' +#' @param ... the input datasets +#' @return A TRONCO complian dataset. +#' @export sbind +sbind = function(...) +{ +# merge two datasets at a time. + samples.pairwise.bind = function(x, y) { + is.compliant(x, 'sbind: input x') + is.compliant(y, 'sbind: input y') + + if(!all(as.events(x) == as.events(y))) { + stop('Datasets have different events, can not bind!') + } + z = list() + + # Copy genotypes and annotations + z$genotypes = rbind(x$genotypes, y$genotypes) + z$annotations = x$annotations + z$types = x$types + + # Copy stages, if present + if(has.stages(x) || has.stages(y)) + { + if(has.stages(x)) xstages = as.stages(x) + else xstages = matrix(rep('NA', nsamples(x)), nrow=nsamples(x)) + + if(has.stages(y)) ystages = as.stages(y) + else ystages = matrix(rep('NA', nsamples(y)), nrow=nsamples(y)) + + z$stages = (rbind(x$stages, y$stages)) + + colnames(z$stages) = 'stage' + } + is.compliant(z, 'sbind: output') + + return(z) + } + + input = list(...) + return(Reduce(samples.pairwise.bind, input)) +} + +#' For an input dataset merge all the events of two or more distincit types +#' (e.g., say that missense and indel mutations are events +#' of a unique "mutation" type) +#' @title merge.types +#' +#' @examples +#' data(test_dataset_no_hypos) +#' merge.types(test_dataset_no_hypos, 'ins_del', 'missense_point_mutations') +#' merge.types(test_dataset_no_hypos, 'ins_del', 'missense_point_mutations', new.type='mut', new.color='green') +#' +#' @param x A TRONCO compliant dataset. +#' @param ... type to merge +#' @param new.type label for the new type to create +#' @param new.color color for the new type to create +#' @return A TRONCO compliant dataset. +#' @export merge.types +merge.types = function(x, ..., new.type = "new.type", new.color = "khaki") { + + # check if x is compliant + is.compliant(x) + + input = list(...) + + # TODO Change this in a better way (deafult ellipsis?) + if (length(input) == 1 && is.null(input[[1]])) { + input = as.list(as.types(x)) + } + + cat(paste("*** Aggregating events of type(s) {", paste(unlist(input), collapse = ", ", sep = ""), "}\nin a unique event with label \"", new.type, "\".\n", sep = "")) + + + if (length(input) <= 1) { + cat("One input type provided, using renaming functions.\n") + + x = rename.type(x, input[[1]], new.type) + x = change.color(x, new.type, new.color) + return(x) + } + + + types.check = lapply(input, function(type) { + type %in% as.types(x) + }) + + if (!all(unlist(types.check))) { + t = (input[!unlist(types.check)]) + + stop("No events of type '", t, "' in input dataset, will not merge.") + } + + if (any(duplicated(input))) { + stop("No duplicated types are allowed, will not merge.") + } + + if (new.type %in% as.types(x)) { + stop(paste0(new.type, "is already used in input dataset, will not merge")) + } + + input = unlist(input) + + + + genes = as.genes(x, types = input) + cat("Dropping event types", paste(input, collapse = ", ", sep = ""), "for", length(genes), "genes.\n") + geno.matrix = matrix(, nrow = nsamples(x), ncol = length(genes)) + + if(!exists('hide.progress.bar') || !hide.progress.bar) { + pb = txtProgressBar(1, length(genes), style = 3) + flush.console() + } + + + for (i in 1:length(genes)) { + if(!exists('hide.progress.bar') || !hide.progress.bar) { + setTxtProgressBar(pb, i) + } + + geno = as.matrix(rowSums(as.gene(x, genes[i], types = input))) + geno[geno > 1] = 1 + + geno.matrix[, i] = geno + } + + + rownames(geno.matrix) = as.samples(x) + colnames(geno.matrix) = genes + if(!exists('hide.progress.bar') || !hide.progress.bar) { + close(pb) + } + + + z = import.genotypes(geno.matrix, event.type = new.type, color = new.color) + if (has.stages(x)) { + z = annotate.stages(z, as.stages(x)) + } + + + y = x + for(i in input) { + y = delete.type(y, i) + } + + w = ebind(y, z) + is.compliant(w) + + return(w) + +} + +#' Deletes all events which have frequency 0 in the dataset. +#' @title trim +#' +#' @examples +#' data(test_dataset) +#' test_dataset = trim(test_dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @return A TRONCO compliant dataset. +#' @export trim +trim = function(x) { + is.compliant(x, 'trim: input') + + x = enforce.numeric(x) + + del = names(which(colSums(x$genotypes) == 0)) + + x$genotypes = x$genotypes[, !colnames(x$genotypes) %in% del, drop = FALSE] + x$annotations = x$annotations[!rownames(x$annotations) %in% del, , drop = FALSE] + + x$types = matrix(x$types[ unique(x$annotations[, 'type', drop = FALSE]), , drop = FALSE ], ncol=1) + rownames(x$types) = unique(x$annotations[,'type', drop = FALSE]) + colnames(x$types) = 'color' + + + is.compliant(x, 'trim: output') + return(x) +} + +# TODO: check +#' Split cohort (samples) into groups, return either all groups or a specific group. +#' @title ssplit +#' +#' @param x A TRONCO compliant dataset. +#' @param clusters A list of clusters. Rownames must match samples list of x +#' @param idx ID of a specific group present in stages. If NA all groups will be extracted +#' @return A TRONCO compliant dataset. +#' @export ssplit +ssplit <- function(x, clusters, idx=NA) +{ + is.compliant(x) + + data = x$genotypes + + cat('*** Splitting cohort into groups.\n') + + # TODO: check that clusters has at least 2 columns + + # Check that map has correct size + if(nsamples(x) != nrow(clusters)) + stop(paste("Error: cannot split, number of samples (", nsamples(x) , + ") and groups (", nrow(clusters),") do not match.", sep='')); + + # Check it is an actual map + if(!all(rownames(clusters) %in% rownames(data))) + stop(paste('Error: samples', paste( + rownames(clusters)[!rownames(clusters) %in% as.samples(x)], + collapse=', ', sep='') ,'are not assigned to a group', sep='')); + + # Groups info + cluster.labels = unique(clusters) + num.clusters = nrow(cluster.labels) + + # Extract a specific group + if(!is.na(idx)) + { + y = list() + samples.in.cluster = rownames(clusters)[clusters == idx] + + cat(paste('Group \"', idx, '\" has ', length(samples.in.cluster), + ' samples, returning this group.\n', sep='')) + + y$genotypes = data[samples.in.cluster, ]; + y$annotations = x$annotations + y$types = x$types + + if(!is.null(x$stages)) + { + y$stages = as.matrix(x$stages[samples.in.cluster, ]) + rownames(y$stages) = samples.in.cluster + } + + is.compliant(y, 'ssplit.split with index') + return(y) + } + + # Extract all groups + partitions = list() + for (i in 1:num.clusters) + { + y = list() + samples.in.cluster = rownames(clusters)[clusters == cluster.labels[i,1]] + + cat(paste('Group \"', cluster.labels[i,1], '\" has ', length(samples.in.cluster), + ' samples.\n', sep='')) + + y$genotypes = data[samples.in.cluster, ]; + y$annotations = x$annotations + y$types = x$types + + if(!is.null(x$stages)) { + y$stages = as.matrix(x$stages[samples.in.cluster, ]) + rownames(y$stages) = samples.in.cluster + } + + is.compliant(y, 'subtypes.split partitionig') + partitions = append(partitions, list(y)) + names(partitions)[i] = cluster.labels[i,1] + } + + return(partitions) +} + +# Split events into groups according to their types. +# +# x: cohort +# @export +tsplit <- function(x) +{ +# Parametro per estrarre un tipo solo di evento? +} + +#' Merge a list of events in an unique event +#' @title merge.events +#' +#' @examples +#' data(muts) +#' dataset = merge.events(muts, 'G1', 'G2', new.event='test', new.type='banana', event.color='yellow') +#' +#' @param x A TRONCO compliant dataset. +#' @param ... A list of events to merge +#' @param new.event The name of the resultant event +#' @param new.type The type of the new event +#' @param event.color The color of the new event +#' @return A TRONCO compliant dataset. +#' @export merge.events +merge.events = function(x, ..., new.event, new.type, event.color) +{ + + events = list(...) + + if(length(events) < 2) { + stop('ERR - badformed events') + } + + for(pattern in as.patterns(x)) { + if(any(events %in% rownames(as.events.in.patterns(x, patterns=pattern)))) { + stop('Found event in pattern \"', pattern, '\". Delete that pattern first.\n') + } + } + + x = enforce.numeric(x) + + y = x + as_ev = as.events(x) + print(events) + for(event in events) { + y = delete.event(y, gene = as_ev[event, 'event'], type = as_ev[event, 'type']) + } + + genos = x$genotypes[, unlist(events)] + genos = matrix(rowSums(genos), nrow = nsamples(x)) + genos[genos > 1] = 1 + colnames(genos) = new.event + rownames(genos) = as.samples(x) + + genos = import.genotypes(genos, event.type = new.type, color = event.color) + y = ebind(y, genos) + + return(y) +} diff --git a/R/estimate.tree.error.rates.R b/R/estimate.tree.error.rates.R deleted file mode 100644 index fc50a7de..00000000 --- a/R/estimate.tree.error.rates.R +++ /dev/null @@ -1,44 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#estimate the error rates by "L-BFGS-B" optimization in terms of L2-error -#INPUT: -#marginal.probs: marginal probabilities -#joint.probs: joint probabilities -#parents.pos: which event is the parent? 0 if none, a number otherwise -#RETURN: -#estimated.error.rates: estimated probabilities, false positive and false negative error rates -"estimate.tree.error.rates" <- -function(marginal.probs,joint.probs,parents.pos) { - #function to be optimized by "L-BFGS-B" optimization in terms of L2-error - f.estimation <- function(errors) { - #set the current error rates with the starting point of the optimization being (e_pos,e_neg) = 0 - error.rates = list(error.fp=errors[1],error.fn=errors[2]); - #estimate the observed probabilities given the error rates - estimated.probs = estimate.tree.probs(marginal.probs,joint.probs,parents.pos,error.rates); - #evaluate the goodness of the estimatione by L2-error on the estimated marginal and joint probabilities - error.estimation = sum((marginal.probs-estimated.probs$marginal.probs)^2)+sum((joint.probs-estimated.probs$joint.probs)^2); - return(error.estimation); - } - #the estimation is performed as in Byrd et al (1995) - #this method allows for box constraints, i.e., each variable can be given a lower and/or upper bound - estimated.error.rates = optim(c(0.00,0.00),f.estimation,method="L-BFGS-B",lower=c(0.00,0.00),upper=c(0.49,0.49))$par; - #structure to save the results - estimated.error.rates = list(error.fp=estimated.error.rates[1],error.fn=estimated.error.rates[2]); - return(estimated.error.rates); -} diff --git a/R/estimate.tree.joint.probs.R b/R/estimate.tree.joint.probs.R deleted file mode 100644 index a769ce76..00000000 --- a/R/estimate.tree.joint.probs.R +++ /dev/null @@ -1,127 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#estimate the theoretical joint probability of two given nodes given the reconstructed topology -#INPUT: -#first.node: first node -#second.node: second node -#parents.pos: which event is the parent? 0 if none, a number otherwise -#marginal.probs: marginal probabilities -#conditional.probs: conditional probabilities -#RETURN: -#estimated.tree.joint.probs: estimated theoretical joint probability -"estimate.tree.joint.probs" <- -function(first.node,second.node,parents.pos,marginal.probs,conditional.probs) { - #if the two nodes are roots - if(parents.pos[first.node]==-1 && parents.pos[second.node]==-1) { - estimated.tree.joint.probs = marginal.probs[first.node,1]*marginal.probs[second.node,1]; - } - #if the two nodes are the same node - else if(first.node==second.node) { - estimated.tree.joint.probs = marginal.probs[first.node,1]; - } - #otherwise - else { - #go through the parents starting from the two nodes to find if they are directly connected - #are the two nodes in the same path? - is.path = 0; - #check if first.node is an ancestor of second.node - curr.first = first.node; - curr.second = second.node; - while(parents.pos[curr.second]!=-1) { - if(curr.first==curr.second) { - is.path = 1; - is.child = curr.second; - break; - } - curr.second = parents.pos[curr.second]; - } - if(is.path==0) { - #check if second.node is an ancestor of first.node - curr.first = first.node; - curr.second = second.node; - while(parents.pos[curr.first]!=-1) { - if(curr.first==curr.second) { - is.path = 1; - is.child = curr.first; - break; - } - curr.first = parents.pos[curr.first]; - } - } - #check if the two nodes are at least connected - #are the two nodes connected? - is.connected = 0; - if(is.path==0) { - curr.first = first.node; - curr.second = second.node; - while(parents.pos[curr.first]!=-1) { - while(parents.pos[curr.second]!=-1) { - if(curr.first==curr.second) { - is.connected = 1; - is.ancestor = curr.first; - break; - } - else { - curr.second = parents.pos[curr.second]; - } - } - if(is.connected==0) { - curr.first = parents.pos[curr.first]; - curr.second = second.node; - } - else { - break; - } - } - } - #now I can set the joint probabilities - #in this case the two nodes are directly connected - #P(child,parent)_estimate = P(child); - if(is.path==1) { - estimated.tree.joint.probs = marginal.probs[is.child,1]; - } - #in this case the two nodes are indirectly connected - #P(i,j)_estimate = P(ancestor)_estimate * P_PATH(ancestor->first.node)_estimate * P_PATH(ancestor->second.node)_estimate - else if(is.connected==1) { - #P(ancestor)_estimat - estimated.tree.joint.probs = marginal.probs[is.ancestor,1]; - #P_PATH(ancestor->first.node)_estimate - first.path = 1; - curr.first = first.node; - while(parents.pos[curr.first]!=is.ancestor) { - first.path = first.path * conditional.probs[curr.first,1]; - curr.first = parents.pos[curr.first]; - } - #P_PATH(ancestor->second.node)_estimate - second.path = 1; - curr.second = second.node; - while(parents.pos[curr.second]!=is.ancestor) { - second.path = second.path * conditional.probs[curr.second,1]; - curr.second = parents.pos[curr.second]; - } - estimated.tree.joint.probs = estimated.tree.joint.probs * first.path * second.path; - } - #in this case the two nodes are not connected - #P(i,j)_estimate = P(i)_estimate * P(j)_estimate - else { - estimated.tree.joint.probs = marginal.probs[first.node,1]*marginal.probs[second.node,1]; - } - } - return(estimated.tree.joint.probs); -} diff --git a/R/estimate.tree.probs.R b/R/estimate.tree.probs.R deleted file mode 100644 index 1e7e1946..00000000 --- a/R/estimate.tree.probs.R +++ /dev/null @@ -1,156 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#estimate the marginal, joint and conditional probabilities given the reconstructed topology and the error rates -#INPUT: -#marginal.probs: observed marginal probabilities -#joint.probs: observed joint probabilities -#parents.pos: position of the parents in the list of nodes -#error.rates: rates for the false positive and the false negative errors -#RETURN: -#estimated.probs: estimated marginal, joint and conditional probabilities -"estimate.tree.probs" <- -function(marginal.probs,joint.probs,parents.pos,error.rates) { - #structure where to save the probabilities to be estimated - estimated.marginal.probs = array(-1, dim=c(nrow(marginal.probs),1)); - estimated.joint.probs = array(-1, dim=c(nrow(marginal.probs),nrow(marginal.probs))); - estimated.conditional.probs = array(-1, dim=c(nrow(marginal.probs),1)); - #estimate the theoretical conditional probabilities given the error rates - #this estimation is performed by applying the error rates to the marginal and joint probabilities - theoretical.conditional.probs = array(-1, dim=c(nrow(marginal.probs),1)); - for (i in 1:nrow(theoretical.conditional.probs)) { - #if the node has a parent, use the error rates to compute the conditional probability - #if the node has no parent, its conditional probability is not considered - if(parents.pos[i,1]!=-1) { - #P(i|j)_theoretical = ((P(i,j)_obs-e_p*(P(j)_obs+P(i)_obs)+e_p^2)/(1-e_n-e_p)^2)/((P(j)_obs-e_p)/(1-e_n-e_p)) - theoretical.conditional.probs[i,1] = (joint.probs[i,parents.pos[i,1]]-error.rates$error.fp*(marginal.probs[parents.pos[i,1],1]+marginal.probs[i,1])+error.rates$error.fp^2)/((marginal.probs[parents.pos[i,1],1]-error.rates$error.fp)*(1-error.rates$error.fn-error.rates$error.fp)); - if(theoretical.conditional.probs[i,1]<0 || theoretical.conditional.probs[i,1]>1) { - #invalid theoretical conditional probability - if(theoretical.conditional.probs[i,1]<0) { - theoretical.conditional.probs[i,1] = 0; - } - else { - theoretical.conditional.probs[i,1] = 1; - } - } - } - } - #estimate the marginal observed probabilities - #this estimation is performed by applying the topological constraints on the probabilities and then the error rates - #I do not have any constraint on the nodes without a parent - child.list <- which(parents.pos==-1); - estimated.marginal.probs[child.list,1] = marginal.probs[child.list,1]; - estimated.marginal.probs.with.error = array(-1, dim=c(nrow(marginal.probs),1)); - estimated.marginal.probs.with.error[child.list,1] = estimated.marginal.probs[child.list,1]; - visited = length(child.list); - #I do not have any constraint for the joint probabilities on the pair of nodes which are the roots of the tree/forest - estimated.joint = array(0, dim=c(nrow(marginal.probs),nrow(marginal.probs))); - for (i in child.list) { - for (j in child.list) { - if(i!=j) { - estimated.joint.probs[i,j] = joint.probs[i,j]; - estimated.joint[i,j] = -1; - } - } - } - #visit the nodes with a parent in topological order - while (visited < nrow(estimated.marginal.probs)) { - #set the new child list - new.child = vector(); - #go through the current parents - for (node in child.list) { - #set the new children - curr.child <- which(parents.pos==node); - #go through the current children - for (child in curr.child) { - #set the marginal probability for this node - #P(child)_estimate = P(parent)_estimate * P(child|parent)_theoretical - estimated.marginal.probs[child,1] = estimated.marginal.probs[parents.pos[child,1],1]*theoretical.conditional.probs[child,1]; - visited = visited + 1; - #P(child,parent)_estimare = P(child)_estimate; - estimated.joint.probs[child,parents.pos[child,1]] = estimated.marginal.probs[child,1]; - estimated.joint[child,parents.pos[child,1]] = 1; - estimated.joint.probs[parents.pos[child,1],child] = estimated.marginal.probs[child,1]; - estimated.joint[parents.pos[child,1],child] = 1; - #apply the error rates to the marginal probabilities - #P(i)_obs_estimate = P(i)_estimate*(1-e_n) + P(not i)_estimate*e_p - estimated.marginal.probs.with.error[child,1] = error.rates$error.fp+(1-error.rates$error.fn-error.rates$error.fp)*estimated.marginal.probs[child,1]; - if(estimated.marginal.probs.with.error[child,1]<0 || estimated.marginal.probs.with.error[child,1]>1) { - #invalid estimated observed probability - if(estimated.marginal.probs.with.error[child,1]<0) { - estimated.marginal.probs.with.error[child,1] = 0; - } - else { - estimated.marginal.probs.with.error[child,1] = 1; - } - } - } - new.child <- c(new.child,curr.child); - } - #set the next child list - child.list = new.child; - } - diag(estimated.joint.probs) = estimated.marginal.probs; - diag(estimated.joint) = -1; - #given the estimated observed probabilities, I can now also estimate the joint probabilities by applying the topological constraints and then the error rates - for (i in 1:nrow(estimated.joint.probs)) { - for (j in i:nrow(estimated.joint.probs)) { - #if I still need to estimate this joint probability - if(estimated.joint[i,j]==0) { - estimated.joint.probs[i,j] = estimate.tree.joint.probs(i,j,parents.pos,estimated.marginal.probs,theoretical.conditional.probs); - estimated.joint[i,j] = 1; - } - #now I can apply the error rates to estimate the observed joint probabilities - if(estimated.joint[i,j]==1) { - #P(i,j)_obs_estimate = P(i,j)_estimate*(1-e_n)^2+P(not i,j)_estimate*e_p*(1-e_n)+P(i,not j)_estimate*(1-e_n)*e_p+P(not i,not j)_estimate*e_p^2; - estimated.joint.probs[i,j] = estimated.joint.probs[i,j]*((1-error.rates$error.fn-error.rates$error.fp)^2)+error.rates$error.fp*(estimated.marginal.probs[i,1]+estimated.marginal.probs[j,1])-error.rates$error.fp^2; - #invalid estimated joint probability - if(estimated.joint.probs[i,j]<0 || estimated.joint.probs[i,j]>min(estimated.marginal.probs.with.error[i,1],estimated.marginal.probs.with.error[j,1])) { - if(estimated.joint.probs[i,j]<0) { - estimated.joint.probs[i,j] = 0; - } - else { - estimated.joint.probs[i,j] = min(estimated.marginal.probs.with.error[i,1],estimated.marginal.probs.with.error[j,1]); - } - } - estimated.joint.probs[j,i] = estimated.joint.probs[i,j]; - } - } - } - #save the estimated probabilities - estimated.marginal.probs = estimated.marginal.probs.with.error; - #given the estimated observed and joint probabilities, I can finally compute the conditional probabilities - #P(child|parent)_obs_estimate = P(parent,child)_obs_estimate/P(parent)_obs_estimate - for (i in 1:nrow(estimated.conditional.probs)) { - if(parents.pos[i,1]!=-1) { - if(estimated.marginal.probs[parents.pos[i,1],1]>0) { - estimated.conditional.probs[i,1] = estimated.joint.probs[parents.pos[i,1],i]/estimated.marginal.probs[parents.pos[i,1],1]; - } - else { - estimated.conditional.probs[i,1] = 0; - } - } - #if the node has no parent, its conditional probability is set to 1 - else { - estimated.conditional.probs[i,1] = 1; - } - } - #structure to save the results - estimated.probs = list(marginal.probs=estimated.marginal.probs,joint.probs=estimated.joint.probs,conditional.probs=estimated.conditional.probs); - return(estimated.probs); -} diff --git a/R/estimate.tree.samples.R b/R/estimate.tree.samples.R deleted file mode 100644 index 9f6f0717..00000000 --- a/R/estimate.tree.samples.R +++ /dev/null @@ -1,156 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#estimate the probability of observing each sample in the dataset given the reconstructed topology -#INPUT: -#dataset: a valid dataset -#reconstructed.topology: the reconstructed topology -#estimated.marginal.probabilities: estimated marginal probabilities of the events -#estimated.conditional.probabilities: estimated conditional probabilities of the events -#error.rates: error rates for false positives and false negatives -#RETURN: -#probabilities: probability of each sample -"estimate.tree.samples" <- function(dataset, reconstructed.topology, estimated.marginal.probabilities, estimated.conditional.probabilities, error.rates) { - #structure where to save the probabilities of the samples - probabilities = array(-1,c(nrow(dataset),1)); - #topological properties: - #1. tree number - #2. parent - #3. level in the tree - topology.structure = array(0,c(nrow(reconstructed.topology),3)); - #go through the subtrees within the topology of four - tree.count = 0; - for (i in 1:nrow(reconstructed.topology)) { - #if node i has no parents, it is a root - if(length(which(reconstructed.topology[,i]==1))==0) { - tree.count = tree.count + 1; - level = 1; - #set the parameters for the root - topology.structure[i,1] = tree.count; - topology.structure[i,2] = -1; - topology.structure[i,3] = level; - curr.node = i; - #go through this tree - while (length(curr.node)>0) { - #move to the next level - level = level + 1; - new.node = vector(); - for (j in 1:length(curr.node)) { - curr.new.node = which(reconstructed.topology[curr.node[j],]==1); - if(length(curr.new.node)>0) { - new.node = c(new.node,curr.new.node); - for (k in 1:length(curr.new.node)) { - #number of the current subtree - topology.structure[curr.new.node[k],1] = tree.count; - #parent of the current node - topology.structure[curr.new.node[k],2] = curr.node[j]; - #level of this node - topology.structure[curr.new.node[k],3] = level; - } - } - } - curr.node = new.node; - } - } - } - #go through the dataset and evalutate the probability of each sample - for (i in 1:nrow(dataset)) { - sample.probability = 1; - for (j in 1:tree.count) { - #probability of this subtree (without any knowledge, I set it to 1) - curr.sample.probability = 1; - #entries referring to this subtree - curr.entry = which(topology.structure[,1]==j); - #samples of each element of this subtree - curr.sample = dataset[i,curr.entry]; - #parents of each element of this subtree - curr.parents = topology.structure[curr.entry,2]; - #level of each element of this subtree - curr.levels = topology.structure[curr.entry,3]; - #set the probability as the one of the root of this tree - curr.sample.probability = curr.sample.probability * estimated.marginal.probabilities[curr.entry[which(curr.levels==1,arr.ind=TRUE)],1]; - #set the maximum level of this subtree - max.level = curr.levels[which.max(curr.levels)]; - #if I have at least one event in this sample - if(length(curr.sample[curr.sample==1])>0) { - #visit the nodes starting from the lower level - is.valid = TRUE; - for (k in max.level:1) { - curr.level.nodes = which(curr.levels==k,arr.ind=TRUE); - #if I'm not on a root - if(k>1) { - curr.level.samples = curr.sample[curr.level.nodes]; - #if I have at least one event at this level - if(length(curr.level.samples[curr.level.samples==1])>0) { - #I can not have a child without its parent - curr.level.parent = curr.parents[curr.level.nodes]; - for (p in 1:length(curr.level.parent)) { - if(dataset[i,curr.level.parent[p]]==0 && dataset[i,curr.entry[curr.level.nodes[p]]]==1) { - is.valid = FALSE; - break; - } - } - } - #if the sample is valid - if(is.valid==TRUE) { - #add the probability of each edge - curr.level.parent = curr.parents[curr.level.nodes]; - for (p in 1:length(curr.level.parent)) { - if(dataset[i,curr.level.parent[p]]==1 && dataset[i,curr.entry[curr.level.nodes[p]]]==0) { - curr.sample.probability = curr.sample.probability * (1 - estimated.conditional.probabilities[curr.entry[curr.level.nodes[p]],1]); - } - else if(dataset[i,curr.level.parent[p]]==1 && dataset[i,curr.entry[curr.level.nodes[p]]]==1) { - curr.sample.probability = curr.sample.probability * estimated.conditional.probabilities[curr.entry[curr.level.nodes[p]],1]; - } - } - } - } - if(is.valid==FALSE) { - curr.sample.probability = 0; - break; - } - } - if(is.valid==FALSE) { - sample.probability = 0; - break; - } - } - #if this sample has no events for this tree - else { - curr.sample.probability = 1 - curr.sample.probability; - } - #update the probability of the topology with the one of this sample - sample.probability = sample.probability * curr.sample.probability; - if(sample.probability==0) { - break; - } - } - probabilities[i,1] = sample.probability; - } - #correct the estimation by the error rates - errors.matrix <- array(0,c(nrow(probabilities),nrow(dataset))); - for (i in 1:nrow(probabilities)) { - for (j in 1:nrow(dataset)) { - curr.sample.x = as.numeric(dataset[i,]); - curr.sample.y = as.numeric(dataset[j,]); - errors.matrix[i,j] = (1-error.rates$error.fp)^((1-curr.sample.x)%*%(1-curr.sample.y))*error.rates$error.fp^((1-curr.sample.x)%*%curr.sample.y)*(1-error.rates$error.fn)^(curr.sample.x%*%curr.sample.y)*error.rates$error.fn^(curr.sample.x%*%(1-curr.sample.y)); - } - } - probabilities[,1] = as.numeric(as.vector(probabilities)%*%errors.matrix); - return(probabilities); -} diff --git a/R/events.R b/R/events.R deleted file mode 100644 index ff885d9e..00000000 --- a/R/events.R +++ /dev/null @@ -1,10 +0,0 @@ -#' @name events -#' @title Events collection for Ovarian cancer CGH data -#' @description -#' This example contains a collection of events associeted to the -#' Ovarian cancer CGH dataset -#' @docType data -# @usage events.load("data/events.txt") -#' @format -#' An example with 7 events -NULL \ No newline at end of file diff --git a/R/events.add.R b/R/events.add.R deleted file mode 100644 index 391a1777..00000000 --- a/R/events.add.R +++ /dev/null @@ -1,83 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @export events.add -#' @title add a new event (e.g., a missense point mutation for EGFR) -#' -#' @description -#' \code{events.add} sets a global data frame 'events' that contains all the events defined. Events can be added and refined incrementally, in any order. -#' -#' @details -#' \code{events.add} allows to define one event at a time. If the event was previously defined, its definition is updated to keep track of its last definition. A consistency check is performed to ensure that the type of defined event is valid. Thus, types must be defined before events are loaded (see \code{types.add}, \code{types.load}). -#' -#' @param event.name The event label(e.g., 'EGFR') . All event labels are strings. -#' @param type.name The type name of this event (e.g., 'missense point'). Type names must refer to types loaded before adding an event, a consistency check raises an error if the type name is unknown. -#' @param column.number The dataset column to which this event is associated. Column number must be an -#' integer positive value. -#' @examples -#' types.add("gain", "red") -#' events.add("8q+", "gain", 1) -#' -events.add <- function(event.name, type.name, column.number = NA){ - - if(missing(event.name) || missing(type.name) || missing(column.number)) - stop("Missing parameter for events.add function: events.add(event.name, type.name, column.number)") - - # Types must be defined before the event definition - if(exists("types") && (length(types) > 0)){ - types <- types - - # Column number has to be a valid number - if(!is.na(column.number)){ - - # Column number has to be an integer number - if(is.wholenumber(column.number)){ - - # Each number given in input from the console is considered to be a "numeric" value. - c <- as.integer(column.number) - - # If a global events variable is found, the new definition is queued to the definitions found. - if(exists("events") && (length(events) > 0)){ - events <- events - events.local <- rbind(events, data.frame(event = event.name, type = type.name, column = c, stringsAsFactors = FALSE)) - } - else - events.local <- data.frame(event = event.name, type = type.name, column = c, stringsAsFactors = FALSE) - - - # The user is free to leave spaces between each element in the definition, definitions file is more clear this way. - events.local <- trim.events(events.local) - - # The check function perform consistency and correctness checks. - events.local <- check.events(events.local, types, FALSE) - - cat(paste("Added event \"", event.name , "\" of type \"", type.name, "\" (color: \"", - toString(search.type.info(type.name)[,"color"]),"\"), dataset column \"", column.number,"\"\n", sep ="")) - - assign("events", events.local, envir = .GlobalEnv) - - } - else - stop("Column must be an integer value!", call. = FALSE) - } - else - stop("Cannot create an event without assigning it a column number.", call. = FALSE) - } - else - stop("types variable not defined!", call. = FALSE) -} \ No newline at end of file diff --git a/R/events.load.R b/R/events.load.R deleted file mode 100644 index 58745a10..00000000 --- a/R/events.load.R +++ /dev/null @@ -1,100 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @export events.load -#' @title load a set of events from file -#' -#' @description -#' \code{events.load} sets a global data frame 'events' that contains all event definitions found in a specified file or dataset to be validated. This is a way to automatise calls to function \code{events.add} for a bunch of events. -#' -#' @details -#' \code{events.load} load a set of events from a given file. The input file must be structured as a CSV file, where each event is defined on a separate line in the format: eventName, typeName, columnNumber. -#' -#' @seealso \code{\link{events.add}} -#' @param data.input The input file path or a dataset to be validated. -#' -events.load <- function(data.input){ - - err <- "" - message <- "The definition file contains errors!" - - if(missing(data.input)) - stop("Missing parameter for events.load function: events.load(data.input)", call. = FALSE) - - # Types must be defined before the event definition - if(exists("types") && (length(types) > 0)){ - - types <- types - # If a global events variable is found, the new definition is queued to the definitions found. - if(exists("events") && (length(events) > 0)){ - events <- events - - if(is.data.frame(data.input)) - events.file <- data.input - else{ - # If the pathname is correct - if(file.exists(data.input)){ - # Definition file may contain error such as the lack of a comma or columns, a try-catch manages this. - err <- tryCatch( events.file <- suppressWarnings(read.table(data.input, sep = ",", col.names = c("event", "type", "column"), stringsAsFactors = FALSE)), - error = function(e) err <- message) - if(toString(err) == message) - stop(err, call. = FALSE) - }else - stop("File not found!", call. = FALSE) - } - if(nrow(events.file) > 0) - # The new definitions are queued. - events.local <- rbind(events, events.file) - else - stop("Empty set of events at input file path or dataset!", call. = FALSE) - } - else{ - if(is.data.frame(data.input)) - events.local <- data.input - else{ - # If the pathname is correct - if(file.exists(data.input)){ - err <- tryCatch( events.local <- suppressWarnings(read.table(data.input, sep = ",", col.names = c("event", "type", "column"), stringsAsFactors = FALSE)), - error = function(e) err <- message) - if(toString(err) == message) - stop(err, call. = FALSE) - }else - stop("File not found!", call. = FALSE) - } - if(nrow(events.local) == 0) - stop("Empty set of events at input file pathor dataset!", call. = FALSE) - } - - # The user is free to leave spaces between each element in the definition, definitions file is more clear this way. - events.local <- trim.events(events.local) - - - # The check function perform consistency and correctness checks. - events.local <- check.events(events.local, types, TRUE) - - - for(i in 1:nrow(events.local)) - cat(paste("Added event \"", events.local[i, "event"] , "\" of type \"", events.local[i, "type"], "\" (color: \"", - toString(search.type.info(events.local[i, "type"])[,"color"]),"\"), dataset column \"", events.local[i, "column"],"\"\n", sep ="")) - - assign("events", events.local, envir = .GlobalEnv) - - }else - stop("Types not defined!", call. = FALSE) - -} \ No newline at end of file diff --git a/R/external.R b/R/external.R new file mode 100644 index 00000000..ef10d31e --- /dev/null +++ b/R/external.R @@ -0,0 +1,409 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + +#' Create an input file for MUTEX +#' (ref: https://code.google.com/p/mutex/ ) +#' @title export,mutex +#' +#' @examples +#' data(gistic) +#' dataset = import.GISTIC(gistic) +#' export.mutex(dataset) +#' +#' @param x A TRONCO compliant dataset. +#' @param filename The name of the file +#' @param filepath The path where to save the file +#' @param label.mutation The event type to use as mutation +#' @param label.amplification The event type to use as amplification (can be a list) +#' @param label.deletion The event type to use as amplification (can be a list) +#' @return A MUTEX example matrix +#' @export export.mutex +export.mutex = function(x, + filename = 'tronco_to_mutex', + filepath = './', + label.mutation = 'SNV', + label.amplification = list('High-level Gain'), + label.deletion = list('Homozygous Loss')) +{ + + is.compliant(x) + data = x + alteration = list(unlist(label.mutation), unlist(label.amplification), unlist(label.deletion)) + + # merge amplification + if (length(label.amplification) >= 0) { + amplification = label.amplification[[1]] + } + if (length(label.amplification) >= 2) { + amplification = 'amplification' + data = union.types(data, label.amplification[[1]], label.amplification[[2]], 'amplification', 'red') + } + if (length(label.amplification) > 2) { + for (label in label.amplification[3:length(label.amplification)]) { + data = union.types(data, label, 'amplification', 'amplification', 'red') + } + } + + # merge deletion + if (length(label.deletion) >= 0) { + deletion = label.deletion[[1]] + } + if (length(label.deletion) >= 2) { + deletion = 'deletion' + data = union.types(data, label.deletion[[1]], label.deletion[[2]], 'deletion', 'blue') + } + if (length(label.deletion) > 2) { + for (label in label.deletion[3:length(label.deletion)]) { + data = union.types(data, label, 'deletion', 'deletion', 'blue') + } + } + + # merge mutation + if (length(label.mutation) >= 0) { + mutation = label.mutation[[1]] + } + if (length(label.mutation) >= 2) { + mutation = 'mutation' + data = union.types(data, label.mutation[[1]], label.mutation[[2]], 'mutation', 'green') + } + if (length(label.mutation) > 2) { + for (label in label.mutation[3:length(label.mutation)]) { + data = union.types(data, label, 'mutation', 'mutation', 'green') + } + } + + samples = rownames(data$genotypes) + genes = unique(data$annotation[,'event']) + + mutex.matrix = matrix(0, nrow = length(genes), ncol = length(samples)) + colnames(mutex.matrix) = samples + rownames(mutex.matrix) = genes + + # legend: + # 0: no alteration + # 1: mutation + # 2: amplification + # 4: deletion + # 3: 1+2 a+m + # 5: 1+4 d+m + + legend = list(1, 2, 4) + names(legend) = list(mutation, amplification, deletion) + tronco.matrix = data$genotypes + + for(sample in rownames(tronco.matrix)) { + for(gene in colnames(tronco.matrix)) { + type = data$annotations[[gene, 'type']] + + if(type %in% alteration && tronco.matrix[sample, gene] == 1) { + to.add = legend[[data$annotations[[gene, 'type']]]] + actual.value = mutex.matrix[data$annotations[[gene, 'event']], sample] + mutex.matrix[data$annotations[[gene, 'event']], sample] = actual.value + to.add + } + } + } + + # reassign value according to mutex notation + + # legend: + # 0: no alteration + # 1: mutation + # 2: amplification + # 3: deletion + # 4: 1+2 a+m + # 5: 1+4 d+m + + # move a+m to 10 + mutex.matrix[which(mutex.matrix == 3)] = 10 + + # move deletion to 3 + mutex.matrix[which(mutex.matrix == 4)] = 3 + + # move a+m to 4 + mutex.matrix[which(mutex.matrix == 10)] = 4 + + mutex.header = append("Symbol", samples) + + filepath = if(grepl("\\/$", filepath)) filepath else paste0(filepath, "/") + con = paste0(filepath, filename) + write(mutex.header, file=con, sep = "\t", ncolumns = length(mutex.header)) + write.table(mutex.matrix, con, sep="\t", append=T, col.names = F, quote = F) + + return(mutex.matrix) +} + +#' Create a .mat file which can be used with NBS clustering +#' (ref: http://chianti.ucsd.edu/~mhofree/wordpress/?page_id=26) +#' @title export.nbs.input +#' +#' @param x A TRONCO compliant dataset. +#' @param map_hugo_entrez Hugo_Symbol-Entrez_Gene_Id map +#' @param file output file name +#' @export export.nbs.input +export.nbs.input = function(x, + map_hugo_entrez, + file = 'tronco_to_nbs.mat') +{ + + is.compliant(x); + + cat('*** Exporting for NBS v. 0.2\n') + cat('Preparing binary input matrix\n') + + # gene_indiv_mat <- the matrix + gene_indiv_mat = as.matrix(x$genotypes) + + # remove colnames and rownames from gene_indiv_mat + rownames(gene_indiv_mat) = NULL + colnames(gene_indiv_mat) = NULL + + cat('Preparing samples IDs \n') + + # sample_id <- patient id + sample_id = as.samples(x) + + cat('Preparing genes list (should be Hugo_Symbol) \n') + + # gene_id_symbol <- sorted name of events + gene_id_symbol = as.genes(x) + + cat('Preparing genes map (should be Hugo_Symbol -> Entrez_Gene_Id) \n') + + if(!('Hugo_Symbol' %in% colnames(map_hugo_entrez))) { + stop('No Hugo_Symbol column in the input map: ', colnames(map_hugo_entrez)) + } + if(!('Entrez_Gene_Id' %in% colnames(map_hugo_entrez))) { + stop('No Entrez_Gene_Id column in the input map: ', colnames(map_hugo_entrez)) + } + + gene_id_all = mapply(function(x) as.numeric(map_hugo_entrez[[which(map_hugo_entrez[,'Hugo_Symbol'] == x), 'Entrez_Gene_Id']]), gene_id_symbol) + + file = if(grepl("\\.mat$", file)) file else paste0(file, ".mat") + con = paste0(file) + + cat('Writing Matlab file to disk:', file, ' ..... ' ) + writeMat(con, gene_indiv_mat = gene_indiv_mat, gene_id_all = gene_id_all, sample_id = sample_id, gene_id_symbol = gene_id_symbol) + cat('DONE') +} + +#' Create a list of unique Mutex groups for a given fdr cutoff +#' current Mutex version is Jan 8, 2015 +#' (ref: https://code.google.com/p/mutex/ ) +#' +#' @title import.mutex.groups +#' @param file Mutex results ("ranked-groups.txt" file) +#' @param fdr cutoff for fdr +#' @param display print summary table of extracted groups +#' @export +import.mutex.groups = function(file, fdr=.2, display = TRUE) +{ + # Found somewhere on the web - makes sense + read.irregular <- function(filenm) + { + fileID <- file(filenm,open="rt") + nFields <- count.fields(fileID) + mat <- matrix(nrow=length(nFields),ncol=max(nFields)) + invisible(seek(fileID,where=0,origin="start",rw="read")) + for(i in 1:nrow(mat) ) { + mat[i,1:nFields[i]] <-scan(fileID,what="",nlines=1,quiet=TRUE) + } + close(fileID) + df <- data.frame(mat, stringsAsFactors=FALSE) + return(df) + } + + x = read.irregular(file) + + # Check header + if(any(x[1,1:3] != c('Score', 'q-val', 'Members'))) + warning('File header does not seem to contain \'Score\', \'q-val\' and \'Members field\' - are you + sure this is a Mutex result file?' ) + + # Remove header + cat(paste('*** Groups extracted - ', (nrow(x) -1), ' total groups.\n', sep='')) + x = x[-1, , drop = F] # this is c('Score', 'q-val', 'Members') + x[, 1] = as.numeric(x[,1]) # fdr + x[, 2] = as.numeric(x[,2]) # q-value + + # remove groups with low fdr + res = x[which(x[,1] < fdr), , drop = F] + + # remove duplicated groups (permutations) + res.g = res[, 3:ncol(res)] + + for(i in 1:nrow(res.g)) res[i,3:ncol(res)] = sort(res.g[i,], na.last = T) + res = res[!duplicated((res[,3:ncol(res), drop=FALSE])), ] + + cat(paste('Selected ', nrow(res), ' unique groups with fdr < ', fdr, '\n', sep='')) + + # Create groups + groups = function(g) { + g = g[3:length(g)] + g = g[!is.na(g)] + names(g) = NULL + return(sort(g)) + } + + G = list() + for(i in 1:nrow(res)) + { + gr = list(groups(res[i, ])) + names(gr) = paste('MUTEX_GROUP', i, sep='') + G = append(G,gr) + } + + rownames(res) = names(G) + colnames(res)[1:2] = c('fdr', 'score') + + # Summary report + if(display) + { + print(res) + + } + return(G) +} + +#' Check if there are multiple sample in x, according to TCGA barcodes naming +#' @title TCGA.multiple.samples +#' +#' @param x A TRONCO compliant dataset. +#' @return A list of barcodes. NA if no duplicated barcode is found +#' @export TCGA.multiple.samples +TCGA.multiple.samples = function(x) +{ + is.compliant(x) + + samples = as.samples(x) + samples.truncated = substring(samples, 0, 12) + + patients = unique(samples.truncated) + + if(length(patients) != nsamples(x)) + { + dup.samples.start = which(duplicated(samples.truncated)) + dup.samples.last = which(duplicated(samples.truncated, fromLast = T)) + + return(sort(samples[c(dup.samples.start, dup.samples.last)])) + } + else return(NA) +} + +#' If there are multiple sample in x, according to TCGA barcodes naming, remove them +#' @title TCGA.remove.multiple.samples +#' +#' @param x A TRONCO compliant dataset. +#' @return A TRONCO compliant dataset +#' @export TCGA.remove.multiple.samples +TCGA.remove.multiple.samples = function(x) +{ + is.compliant(x, err.fun='Removing TCGA multiple samples (input)') + + dup = TCGA.multiple.samples(x) + dup.truncated = substring(dup, 0, 12) + patients = unique(dup.truncated) + + for(i in 1:length(patients)) + { + patients.samples = which(dup.truncated == patients[i]) + multiple.samples = dup[patients.samples] + + cat('Patient', patients[i], 'with sample aliquotes\n' ) + print(substring(multiple.samples, 14, 29)) + + keep = max(multiple.samples) + discard = multiple.samples[which(multiple.samples != keep)] + + cat('Selecting', keep, '\n') + x = delete.samples(x, discard) + } + + is.compliant(x, err.fun='Removing TCGA multiple samples (output)') + return(x) +} + +#' Keep only the first 12 character of samples barcode if there are no duplicates +#' @title TCGA.shorten.barcodes +#' +#' @param x A TRONCO compliant dataset. +#' @return A TRONCO compliant dataset +#' @export TCGA.shorten.barcodes +TCGA.shorten.barcodes = function(x) +{ + is.compliant(x, err.fun='Shartening TCGA barcodes (input)') + + # Check if it has duplicated barcodes + if(!all(is.na(TCGA.multiple.samples(x)))) + stop( + paste('This dataset contains multiple samples for some patients - cannot consolidate.', + '\n Samples with barcodes indicating multiple patients: \n', paste(TCGA.multiple.samples(x), collapse = '\n'), '.' + , sep ='')) + + # Shorten sample barcodes + rownames(x$genotypes) = substring(rownames(x$genotypes), 0, 12) + if(has.stages(x)) rownames(x$stages) = rownames(x$genotypes) + + is.compliant(x, err.fun='Shartening TCGA barcodes (output)') + return(x) +} + +#' TODO +#' @title TCGA.map.clinical.data +#' +#' @param file TODO +#' @param sep file delimiter +#' @param column.samples TODO +#' @param column.map TODO +#' @return a map +#' @export TCGA.map.clinical.data +TCGA.map.clinical.data = function(file, sep='\t', column.samples, column.map) +{ + + data = read.delim( + file = file, + sep = sep, + header = TRUE, + stringsAsFactors=F) + + if(!(column.samples %in% colnames(data))) + stop(paste('Cannot find samples column \"', column.samples, '\". Available columns: \n\t', + paste(colnames(data), collapse='\n\t'), sep = '')) + + if(!(column.map %in% colnames(data))) + stop(paste('Cannot find required map column \"', column.map, '\". Available columns: \n\t', + paste(colnames(data), collapse='\n\t'), sep = '')) + + map = data.frame(data[, column.map], row.names = data[, column.samples]) + colnames(map) = column.map + + return(map) +} + +# internal function +sample.RColorBrewer.colors = function(palette, ncolors) +{ + if(!palette %in% rownames(brewer.pal.info)) stop('Invalid RColorBrewer palette.') + + pmax.cols = brewer.pal.info[palette, 'maxcolors'] + + cols = min(pmax.cols , ncolors) + cols = ifelse(cols < 3, 3, cols) + + colors = brewer.pal(n=cols, name=palette) + if(ncolors < 3) colors = colors[1:ncolors] + else colors = colorRampPalette(colors)(ncolors) + + return(colors) +} diff --git a/R/get.tree.parents.R b/R/get.tree.parents.R deleted file mode 100644 index c7cfd9c5..00000000 --- a/R/get.tree.parents.R +++ /dev/null @@ -1,80 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#select at the most one parent for each node based on the probability raising criteria -#INPUT: -#marginal.probs: observed marginal probabilities -#joint.probs: observed joint probabilities -#lambda: shrinkage parameter (value between 0 and 1) -#RETURN: -#best.parents: list of the best parents -"get.tree.parents" <- -function(marginal.probs,joint.probs,lambda) { - #compute the scores for each edge - scores = get.tree.scores(marginal.probs,joint.probs,lambda); - pr.score = scores$pr.score; - #set to -1 the scores where there is no causation according to Suppes' condition - #[i,j] means i is causing j - for (i in 1:ncol(pr.score)) { - for (j in i:ncol(pr.score)) { - #the diagonal has not to be considered (no self-cause) - if(i==j) { - pr.score[i,j] = -1; - } - #otherwise, apply Suppes's criteria for prima facie cause - else { - #if both the scores are not greater then 0, they are not valid - #in this case the events are causally irrelevant, i.e., independent - if(pr.score[i,j]<=0 && pr.score[j,i]<=0) { - pr.score[i,j] = -1; - pr.score[j,i] = -1; - } - #if at least one score is greater then 0, I keep the greater one - #in this way I give a (time) direction to the progression - #furthermore, this constrain the topology to be acyclic by construction - else { - if(pr.score[i,j]>pr.score[j,i]) { - pr.score[j,i] = -1; - } - else { - pr.score[i,j] = -1; - } - } - } - } - } - #chose at the most one parent per node - #here I suppose that each node has a parent - #spurious causes are considered (and removed) later - best.parents = array(-1, dim=c(ncol(pr.score),1)); - for (i in 1:ncol(pr.score)) { - #-1 means that the best parent is the Root - curr.best = -1; - #find the best parent for the current node - best = which.max(pr.score[,i]); - if(pr.score[best,i]>0) { - curr.best = best; - } - #set the best parent for the current node - best.parents[i,1] = curr.best; - } - #check for spurious causes by the independent progression filter and complete the parents list - parents = verify.parents(best.parents,marginal.probs,joint.probs); - best.parents = list(parents=parents,marginal.probs=marginal.probs,joint.probs=joint.probs,pr.score=scores$pr.score); - return(best.parents); -} diff --git a/R/get.tree.scores.R b/R/get.tree.scores.R deleted file mode 100644 index b2952acf..00000000 --- a/R/get.tree.scores.R +++ /dev/null @@ -1,44 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#compute the probability raising based scores -#INPUT: -#marginal.probs: observed marginal probabilities -#joint.probs: observed joint probabilities -#lambda: shrinkage parameter (value between 0 and 1) -#RETURN: -#scores: probability raising based scores -"get.tree.scores" <- -function(marginal.probs,joint.probs,lambda) { - #structure where to save the probability raising scores - pr.score = array(-1, dim=c(nrow(marginal.probs),nrow(marginal.probs))); - #compute the probability raising based scores - for (i in 1:ncol(pr.score)) { - for (j in 1:ncol(pr.score)) { - #alpha is the probability raising model of causation (raw model estimate) - alpha = ((joint.probs[i,j]/marginal.probs[i])-((marginal.probs[j]-joint.probs[i,j])/(1-marginal.probs[i])))/((joint.probs[i,j]/marginal.probs[i])+((marginal.probs[j]-joint.probs[i,j])/(1-marginal.probs[i]))); - #beta is the correction factor (based on time distance in terms of statistical dependence) - beta = (joint.probs[i,j]-marginal.probs[i]*marginal.probs[j])/(joint.probs[i,j]+marginal.probs[i]*marginal.probs[j]); - #the overall estimator is a shrinkage-like combination of alpha and beta - #the scores are saved in the convention used for an ajacency matrix, i.e. [i,j] means causal edge i-->j - pr.score[i,j] = (1-lambda)*alpha + lambda*beta; - } - } - scores = list(marginal.probs=marginal.probs,joint.probs=joint.probs,pr.score=pr.score); - return(scores); -} diff --git a/R/infoset.R b/R/infoset.R deleted file mode 100644 index 7967ccb9..00000000 --- a/R/infoset.R +++ /dev/null @@ -1,99 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -infoset <- function(merged.columns, deleted.column, verbose = FALSE){ - - events <- events - # Gets all useful info for each node and returns a matrix that - # contains color and label to be assigned to each event. - get.all.info <- function(events, types){ - all.vis.labels <- c() - all.labels <- c() - colors <- c() - for(i in 1:nrow(events)){ - - # For the event in column i look for color, label name for computation - # and label good to graph drawing - color <- search.type.info(search.event(i)$type)$color - label <- paste(toString(search.event(i)$event), toString(search.event(i)$type), sep=":") - label.vis <- toString(search.event(i)$event) - - if(is.null(color)) - stop(paste("For the event \"", search.event(i)$event, "\" no color can be associated to the given type, check the types definition!", sep = ""), call. = FALSE) - # Load - colors <- c(colors, color) - all.vis.labels <- c(all.vis.labels, label.vis) - all.labels <- c(all.labels, label) - } - - all.info <- list() - all.info$all.labels <- all.labels - all.info$all.vis.labels <- all.vis.labels - all.info$colors <- colors - return(all.info) - - } - - all.info <- get.all.info(events, types) - - # Events are merged as merged.columns indicates. - if(length(merged.columns) > 0 && !is.na(merged.columns)){ - - for(i in 1:nrow(merged.columns)){ - - merged <- merged.columns[i,1] - with <- merged.columns[i,2] - - # all.labels contains all the "computations label", which are composed by event_name:type, - # this turns the label into a key for the computation process - all.info$all.labels[merged] <- paste(all.info$all.labels[merged], all.info$all.labels[with], sep = " - ") - - # all.vis.labels cotains all labels to be assigned to each node in the final plot - all.info$all.vis.labels[merged] <- paste(all.info$all.vis.labels[merged], all.info$all.vis.labels[with], sep = " - ") - - if(verbose) - cat(paste("Column ", toString(merged), " event: ", search.event(merged)$event, " merged with column ", - toString(with), " event: ", search.event(with)$event, " ", search.event(deleted.column[i])$eventcolor, - "lightyellow color", " is assigned\n", sep = "")) - - all.info$colors[merged] <- "lightyellow" - - - } - if(! search.type("merged")) - types <- rbind(types, data.frame(type = "merged", color = "lightyellow", stringsAsFactors = FALSE)) - assign("types", types, envir = .GlobalEnv) - } - - # Deletes columns if some column number is in deleted.column vector. - # WARINIG: All columns merged and useless are already put in the deleted.column vetor by CAPRESE algorithm - if(length(deleted.column) > 0 && !is.na(deleted.column[1])){ - all.info$all.labels <- all.info$all.labels[-1*deleted.column] - all.info$all.vis.labels <- all.info$all.vis.labels[-1*deleted.column] - all.info$colors <- all.info$colors[-1*deleted.column] - if(verbose) - for(i in 1:length(deleted.column)) - cat(paste("Column ", toString(deleted.column[i]), " type: ", - search.event(deleted.column[i])$type, " event: ", - search.event(deleted.column[i])$event, - " deleted\n", sep = "")) - } - - return(all.info) - -} \ No newline at end of file diff --git a/R/is.wholenumber.R b/R/is.wholenumber.R deleted file mode 100644 index fc37abd7..00000000 --- a/R/is.wholenumber.R +++ /dev/null @@ -1,23 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -# is.wholenumber returns TRUE if the given number is a numeric value without -# decimal numbers after the comma. -is.wholenumber <- function(x, tol = .Machine$double.eps^0.5){ - return(abs(x - round(x)) < tol) -} \ No newline at end of file diff --git a/R/loading.R b/R/loading.R new file mode 100644 index 00000000..812f2455 --- /dev/null +++ b/R/loading.R @@ -0,0 +1,517 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + + +#' Import a matrix of 0/1 alterations as a TRONCO compliant dataset. Input "geno" can be either a dataframe or +#' a file name. In any case the dataframe or the table stored in the file must have a column for each altered +#' gene and a rows for each sample. Colnames will be used to determine gene names, if data is loaded from +#' file the first column will be assigned as rownames. +#' +#' @title import.genotypes +#' @param geno Either a dataframe or a filename +#' @param event.type Any 1 in "geno" will be interpreted as a an observed alteration labeled with type "event.type" +#' @param color This is the color used for visualization of events labeled as of "event.type" +#' @return A TRONCO compliant dataset +#' @export import.genotypes +import.genotypes = function(geno, event.type = "variant", color = "Darkgreen") { + + if(!(is.data.frame(geno) || is.matrix(geno)) && is.character(geno)) + { + cat('*** Input "geno" is a character, interpreting it as a filename to load a table. + Required table format: + \t- one column for each gene, one row for each gene; + \t- colnames/rownames properly defined.\n') + + data = read.table(geno, + header = TRUE, + check.names = F, + stringsAsFactors = F) + + if(any(is.null(colnames(data)))) stop('Input table should have column names.') + rownames(data) = data[, 1] + data[, 1] = NULL + geno = data +} + + # Avoid malformed datasets +if (ncol(geno) == 0 || nrow(geno) == 0) + stop("Empty genotypes (number of rows/columns 0), will not import.") +nc = ncol(geno) +nr = nrow(geno) + + # Gather col/row names +if (is.null(colnames(geno))) { + cn = paste0("Gene", 1:ncol(geno)) + warning("Missing column names to identify genes. Will use labels \"Gene1\", \"Gene2\", .....") +} else cn = colnames(geno) + + # Gather col/row names +if (is.null(rownames(geno))) { + rn = paste0("Sample", 1:nrow(geno)) + warning("Missing row names to identify samples. Will use labels \"Sample1\", \"Sample2\", .....") +} else rn = rownames(geno) + +x = list() + + # Access keys - G1, G2, ... +keys = paste0("G", 1:ncol(geno)) + + # Genotype matrix +x$genotypes = as.matrix(geno) +colnames(x$genotypes) = keys +rownames(x$genotypes) = rn + + # Create attributes +x$annotations = matrix(0, nrow = nc, ncol = 2) +colnames(x$annotations) = c("type", "event") +rownames(x$annotations) = keys + +x$annotations[, "type"] = event.type +x$annotations[, "event"] = cn + + # We create a map from types to colors +x$types = matrix(color, nrow = 1, ncol = 1) +rownames(x$types) = event.type +colnames(x$types) = c("color") + +is.compliant(x, "import.genotypes: output") + +return(x) +} + + +#' Transform GISTIC scores for CNAs in a TRONCO compliant object. Input can be either a matrix, with columns +#' for each altered gene and rows for each sample; in this case colnames/rownames mut be provided. If input +#' is a character an attempt to load a table from file is performed. In this case the input table format +#' should be constitent with TCGA data for focal CNA; there should hence be: one column for each sample, +#' one row for each gene, a column Hugo_Symbol with every gene name and a column Entrez_Gene_Id with every +#' gene\'s Entrez ID. A valid GISTIC score should be any value of: "Homozygous Loss" (-2), "Heterozygous +#' Loss" (-1), "Low-level Gain" (+1), "High-level Gain" (+2). +#' +#' @examples +#' data(gistic) +#' gistic = import.GISTIC(gistic) +#' gistic = annotate.description(gistic, 'Example GISTIC') +#' oncoprint(gistic) +#' +#' @title import.GISTIC +#' @param x Either a dataframe or a filename +#' @return A TRONCO compliant representation of the input CNAs. +#' @export import.GISTIC +import.GISTIC <- function(x) { + + if(!(is.data.frame(x) || is.matrix(x)) && is.character(x)) { + cat('*** Input "x" is a character, interpreting it as a filename to load a table. + Required table format constitent with TCGA data for focal CNAs: + \t- one column for each sample, one row for each gene; + \t- a column Hugo_Symbol with every gene name; + \t- a column Entrez_Gene_Id with every gene\'s Entrez ID.\n') + + data = read.table(x, + header = TRUE, + check.names = F, + stringsAsFactors = F) + + cat('Data loaded.\n') + + if(any(is.null(colnames(data)))) { + stop('Input table should have column names.') + } + if(!'Hugo_Symbol' %in% colnames(data)) { + stop('Missing Hugo_Symbol column!') + } + if(!'Entrez_Gene_Id' %in% colnames(data)) { + stop('Missing Hugo_Symbol column!') + } + data$Entrez_Gene_Id = NULL + rownames(data) = data$Hugo_Symbol + data$Hugo_Symbol = NULL + x = t(data) + } + + cat("*** GISTIC input format conversion started.\n") + + # For next operations it is convenient to have everything as 'char' rather than 'int' + if (typeof(x[, 1]) != typeof("somechar")) { + cat("Converting input data to character for import speedup.\n") + rn = rownames(x) + x = apply(x, 2, as.character) + rownames(x) = rn + } + + if (any(is.na(x))) { + warning("NA entries were replaced with 0s.\n") + } + x[is.na(x)] = 0 + + cat("Creating ", 4 * (ncol(x)), "events for", ncol(x), "genes \n") + + # gene symbols + enames <- colnames(x) + if (is.null(enames)) { + stop("Error: gistic file has no column names and can not imported, aborting!") + } + + cat("Extracting \"Homozygous Loss\" events (GISTIC = -2) \n") + d.homo = x + d.homo[d.homo != -2] <- 0 + d.homo[d.homo == -2] <- 1 + + cat("Extracting \"Heterozygous Loss\" events (GISTIC = -1) \n") + d.het <- x + d.het[d.het != -1] <- 0 + d.het[d.het == -1] <- 1 + + cat("Extracting \"Low-level Gain\" events (GISTIC = +1) \n") + d.low <- x + d.low[d.low != 1] <- 0 + + cat("Extracting \"High-level Gain\" events (GISTIC = +2) \n") + d.high <- x + d.high[d.high != 2] <- 0 + d.high[d.high == 2] <- 1 + + cat("Transforming events in TRONCO data types ..... \n") + d.homo = trim(import.genotypes(d.homo, event.type = "Homozygous Loss", color = "dodgerblue4")) + d.het = trim(import.genotypes(d.het, event.type = "Heterozygous Loss", color = "dodgerblue1")) + d.low = trim(import.genotypes(d.low, event.type = "Low-level Gain", color = "firebrick1")) + d.high = trim(import.genotypes(d.high, event.type = "High-level Gain", color = "firebrick4")) + + d.cnv.all = ebind(d.homo, d.het, d.low, d.high) + + cat("*** Data extracted, returning only events observed in at least one sample \n", + "Number of events: n =", nevents(d.cnv.all), "\n", + "Number of genes: |G| =", ngenes(d.cnv.all), "\n", + "Number of samples: m =", nsamples(d.cnv.all), "\n") + + + is.compliant(d.cnv.all, "import.gistic: output") + return(d.cnv.all) +} + + + +#' Import mutation profiles from a Manual Annotation Format (MAF) file. All mutations are aggregated as a +#' unique event type labeled "Mutation" and assigned a color according to the default of function +#' \code{import.genotypes}. If this is a TCGA MAF file check for multiple samples per patient is performed +#' and a warning is raised if these occurr. +#' +#' @examples +#' data(maf) +#' mutations = import.MAF(maf) +#' mutations = annotate.description(mutations, 'Example MAF') +#' mutations = TCGA.shorten.barcodes(mutations) +#' oncoprint(mutations) +#' +#' @title import.MAF +#' @param file MAF filename +#' @param sep MAF separator, default \'\\t\' +#' @param is.TCGA TRUE if this MAF is from TCGA; thus its sample codenames can be interpreted +#' @return A TRONCO compliant representation of the input MAF +#' @export import.MAF +import.MAF <- function(file, sep = '\t', is.TCGA = TRUE) { + + if(!(is.data.frame(file) || is.matrix(file)) && is.character(file)) { + cat("*** Importing from file: ", file, "\n") + cat("Loading MAF file ...") + maf = read.delim(file, comment.char = "#", sep = sep, header = TRUE, stringsAsFactors = FALSE) + cat("DONE\n") + } else { + cat("*** Importing from dataframe\n") + cat("Loading MAF dataframe ...") + maf = file + cat("DONE\n") + } + + #### Auxiliary functions to extract information from the MAF file + # This is the possibly smallest type of information required to prepare a TRONCO file + # If any necessary information is missing, execution is aborted + variants = function(x) { + if (!("Variant_Classification" %in% colnames(x))) + warning("Missing Variant_Classification flag in MAF file.") + + return(unique(x[, "Variant_Classification"])) + } + + samples = function(x) { + if (!("Tumor_Sample_Barcode" %in% colnames(x))) + stop("Missing Tumor_Sample_Barcode flag in MAF file - will not import.") + + return(unique(x[, "Tumor_Sample_Barcode"])) + } + + genes = function(x) { + if (!("Hugo_Symbol" %in% colnames(x))) + stop("Missing Hugo_Symbol flag in MAF file - will not import.") + + return(unique(x[, "Hugo_Symbol"])) + } + + valid.calls = function(x) { + if (!("Validation_Status" %in% colnames(x))) + warning("Missing Validation_Status flag in MAF file.") + else return(which(x[, "Validation_Status"] == "Valid")) + } + + as.TCGA.patients = function(x) { + samples = samples(x) + patients = substr(samples, 0, 12) + + return(unique(patients)) + } + + # General report about the mutations stored in this MAF + cat("*** MAF report: ") + if (is.TCGA) { + cat("TCGA=TRUE") + } + + MAF.variants = variants(maf) + MAF.samples = samples(maf) + MAF.genes = genes(maf) + + cat("\nType of annotated mutations: \n") + print(MAF.variants) + + cat("Number of samples:", length(MAF.samples), "\n") + + # If it is TCGA you should check for multiple samples per patient + if (is.TCGA) { + TCGA.patients = as.TCGA.patients(maf) + cat("[TCGA = TRUE] Number of TCGA patients:", length(TCGA.patients), "\n") + + if (length(TCGA.patients) != length(MAF.samples)) { + warning("This MAF contains duplicate samples for some patients - use TCGA functions for further information") + } + } + + which.valid.calls = valid.calls(maf) + n.valid = length(which.valid.calls) + cat("Number of annotated mutations:", nrow(maf), "\n") + + if (("Validation_Status" %in% colnames(maf))) { + cat("Mutations annotated with \"Valid\" flag (%):", round(n.valid/nrow(maf) * 100, 0), "\n") + } else { + cat("Mutations annotated with \"Valid\" flag (%): missing flag\n") + } + cat("Number of genes (Hugo_Symbol):", length(MAF.genes), "\n") + cat("Starting conversion from MAF to 0/1 mutation profiles (1 = mutation) :") + cat(length(MAF.samples), "x", length(MAF.genes), "\n") + + flush.console() + pb <- txtProgressBar(1, nrow(maf), style = 3) + + # Temporary binary matrix + binary.mutations = matrix(0, nrow = length(MAF.samples), ncol = length(MAF.genes)) + + colnames(binary.mutations) = MAF.genes + rownames(binary.mutations) = MAF.samples + + for (i in 1:nrow(maf)) { + setTxtProgressBar(pb, i) + binary.mutations[maf$Tumor_Sample_Barcode[i], maf$Hugo_Symbol[i]] = 1 + } + close(pb) + + cat("Starting conversion from MAF to TRONCO data type.\n") + tronco.data = import.genotypes(binary.mutations, event.type = "Mutation") + is.compliant(tronco.data) + + return(tronco.data) +} + +#' Extract a map Hugo_Symbol -> Entrez_Gene_Id from a MAF input file. If some genes map to ID 0 +#' a warning is raised. +#' +#' @title extract.MAF.HuGO.Entrez.map +#' @param file MAF filename +#' @param sep MAF separator, default \'\\t\' +#' @return A mapHugo_Symbol -> Entrez_Gene_Id. +#' @export extract.MAF.HuGO.Entrez.map +extract.MAF.HuGO.Entrez.map = function(file, sep = "\t") { + cat("*** Importing from file: ", file, "\n") + cat("Loading MAF file ...") + + maf = read.delim(file, comment.char = "#", sep = sep, header = TRUE, stringsAsFactors = FALSE) + cat("DONE\n") + + map = unique(maf[, c("Hugo_Symbol", "Entrez_Gene_Id")]) + map.missing = which(map[, "Entrez_Gene_Id"] == 0) + map.missing = map[map.missing, "Hugo_Symbol", drop = FALSE] + + if (nrow(map.missing) > 0) + warning("The are Hugo_Symbol with Entrez_Gene_Id equal to 0") + else cat("Map seem consistent (non-zero Entrez_Gene_Id) for", nrow(map), "genes") + + return(map) +} + + +#' Wrapper for the CGDS package to query the Cbio portal. This can work either automatically, if one +#' sets \code{cbio.study}, \code{cbio.dataset} or \code{cbio.profile}, or interactively otherwise. A +#' list of genes to query with less than 900 entries should be provided. This function returns a list +#' with two dataframe: the gentic profile required and clinical data for the Cbio study. Output is also +#' saved to disk as Rdata file. See also http://www.cbioportal.org. +#' +#' @title cbio.query +#' @param cbio.study Cbio study ID +#' @param cbio.dataset Cbio dataset ID +#' @param cbio.profile Cbio genetic profile ID +#' @param genes A list of < 900 genes to query +#' @return A list with two dataframe: the gentic profile required and clinical data for the Cbio study. +#' @export cbio.query +#' @importFrom cgdsr CGDS getCancerStudies getCaseLists getGeneticProfiles getProfileData getClinicalData +cbio.query <- function(cbio.study = NA, cbio.dataset = NA, cbio.profile = NA, genes) { + cat("*** CGDS plugin for Cbio query.\n") + # require("cgdsr") + + if(is.null(genes) || is.na(genes) || length(genes) == 0) { + stop('Empty list of genes to query') + } + if(length(genes) > 900) { + stop('URL with more than 900 genes will not be accepted, please split it.') + } + + if (is.na(cbio.study)) { + cat("\nAutomatic CBIO study assessment: off") + }else { + cat(paste("\nAutomatic CBIO study index: ", cbio.study, sep = "")) + } + + if (is.na(cbio.dataset)) { + cat("\nAutomatic CBIO dataset assessment: off") + } else { + cat(paste("\nAutomatic CBIO dataset index: ", cbio.dataset, sep = "")) + } + if (is.na(cbio.profile)) { + cat("\nAutomatic CBIO profile assessment: off") + } else { + cat(paste("\nAutomatic CBIO profile index: ", cbio.profile, sep = "")) + } + mycgds = CGDS("http://www.cbioportal.org/public-portal/") + + + cs = getCancerStudies(mycgds) + if (is.na(cbio.study)) { + cat("\nAvailable studies at CBIO portal.\n") + print(cs[c("cancer_study_id", "name")]) + + repeat{ + cbio.study <- readline(prompt = "Enter CBIO study id: ") + if(cbio.study %in% cs$cancer_study_id) + break + } + } + + # Get available case lists (collection of samples) for a given cancer study + mycancerstudy <- cbio.study + + if (is.na(mycancerstudy)) { + stop("CBIO study id invalid. Aborting.") + } + + study <- cs[cs$cancer_study_id == cbio.study, , drop = F] + + cat(paste("\nCancer codename: ", study[, 1], sep = "")) + cat(paste("\nCancer Ref.: ", study[, 2], sep = "")) + cat(paste("\nCancer Syn.: ", study[, 3], sep = "")) + + cutdescr = function(x, n) + { + x[, ncol(x)] = ifelse( + nchar(x[, ncol(x)]) > n, + paste0(substr(x[, ncol(x)], 1, n), '....'), + x[, ncol(x)]) + return(x) + } + + # Get dataset for the study + csl = getCaseLists(mycgds, cbio.study) + if (is.na(cbio.dataset)) { + cat("\nAvailable datasets for study:", cbio.study, "\n") + print(cutdescr(csl[c("case_list_id", "case_list_description")], 90)) + + repeat{ + cbio.dataset <- readline(prompt = "Enter study dataset id: ") + if(cbio.dataset %in% csl$case_list_id) break + } + } + + caselist = csl[csl$case_list_id == cbio.dataset, , drop = F] + + if (any(is.na(caselist))) stop("No data for selected study. Aborting.") + + + cat(paste("\nData codename: ", caselist[, 1], sep = "")) + cat(paste("\nData Ref.: ", caselist[, 2], sep = "")) + cat(paste("\nData Syn.: ", caselist[, 3], sep = "")) + + # Get available genetic profiles + gp = getGeneticProfiles(mycgds, cbio.study) + if (is.na(cbio.profile)) { + cat("\nAvailable genetic profiles for selected datasets.\n") + print(cutdescr(gp[c("genetic_profile_id", "genetic_profile_description")], 90)) + + repeat{ + cbio.profile <- readline(prompt = "Enter genetic profile id: ") + if(cbio.profile %in% gp$genetic_profile_id) break + } + } + + profile = gp[gp$genetic_profile_id == cbio.profile, , drop = F] + + if (any(is.na(cbio.profile))) { + stop("No samples for this profile. Aborting") + } + + samples.name <- profile[1, 1] + samples.ref <- profile[1, 2] + samples.syn <- profile[1, 3] + samples.id <- profile[1, 4] + + cat(paste("\nSamples codename: ", samples.name, sep = "")) + cat(paste("\nData Ref.: ", samples.ref, sep = "")) + cat(paste("\nData Syn.: ", samples.syn, sep = "")) + + cat("\n\nQuerying the following list of genes: ") + cat(paste(genes, collapse = ", "), '\n') + + # Get data slices for a specified list of genes, genetic profile and case list + data <- getProfileData(mycgds, genes, samples.name, cbio.dataset) + rownames(data) = gsub('\\.', '-', rownames(data)) + cat('Symbol \".\" was replaced with "-" in sample IDs.\n') + + # Export + cat(paste("\nData retrieved: ", nrow(data), " samples, ", ncol(data), " genes.", sep = "")) + + # Get clinical data for the case list + cat("\nRetrieved also clinical data for samples:", cbio.dataset) + + clinicaldata = getClinicalData(mycgds, cbio.dataset) + rownames(clinicaldata) = gsub('\\.', '-', rownames(clinicaldata)) + + ofile <- paste(cbio.study, cbio.dataset, samples.name, "Rdata", sep = ".") + + ret = NULL + ret$profile = data + ret$clinical = clinicaldata + + save(ret, file=ofile) + + cat(paste("\nData exported to file: ", ofile, sep = "")) + write.table(data, file = ofile) + + return(ret) +} diff --git a/R/ov.cgh.R b/R/ov.cgh.R deleted file mode 100644 index e0b110c6..00000000 --- a/R/ov.cgh.R +++ /dev/null @@ -1,21 +0,0 @@ -#' @name ov.cgh -#' @title Ovarian cancer CGH data -#' @description -#' This is a data set obtained using the comparative genomic -#' hybridization technique (CGH) on sam- ples from papillary serous -#' cystadenocarcinoma of the ovary. Only the seven most commonly -#' occurring events are given. -#' @docType data -# @usage data.load("data/CGH.txt") -#' @format -#' A data frame with 87 observations on 7 variables. -#' @source \url{http://www.ncbi.nlm.nih.gov/sky/} -#' @details -#' The CGH technique uses fluorescent staining to detect abnormal -#' (increased or decreased) number of DNA copies. Often the results -#' are reported as a gain or loss on a certain arm, without further -#' distinction for specific regions. It is common to denote a change -#' in DNA copy number on a specific chromosome arm by prefixing a "-" -#' sign for decrease and a "+" for increase. Thus, say, -3q denotes -#' abnormally low DNA copy number on the q arm of the 3rd chromosome. -NULL \ No newline at end of file diff --git a/R/reset.R b/R/reset.R deleted file mode 100644 index b292a06f..00000000 --- a/R/reset.R +++ /dev/null @@ -1,63 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @export reset.events -#' @export reset.types -#' @export reset -#' @name reset -#' @title reset -#' @description -#' A set of functions to reset events, types and data.values variables -#' -#' @usage reset.events() -#' @details \code{reset.events} Resets the events variable -#' @examples -#' reset.events() -reset.events <- function(){ - assign("events", NULL, envir = .GlobalEnv) -} -#' @rdname reset -#' @usage reset.types() -#' @details \code{reset.types()} Resets the types variable -#' @examples -#' reset.types() -reset.types <- function(){ - assign("types", NULL, envir = .GlobalEnv) -} -reset.data.values <- function(){ - assign("data.values", NULL, envir = .GlobalEnv) -} -#' @rdname reset -#' @usage reset() -#' @details \code{reset()} Resets types, events and data.values variables -#' @examples -#' reset() -reset <- function(){ - reset.events() - reset.types() - reset.data.values() - if(exists("num.hypotheses")) { - assign("num.hypotheses", 0, envir = .GlobalEnv) - } - if(exists("llist")) { - assign("llist", vector(), envir = .GlobalEnv) - } - if(exists("hlist")) { - assign("hlist", vector(), envir = .GlobalEnv) - } -} diff --git a/R/search.R b/R/search.R deleted file mode 100644 index 41828439..00000000 --- a/R/search.R +++ /dev/null @@ -1,54 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -# Check if there is a type with the given tipe.name in type variable. -search.type <- function(type.name){ - types <- types - for(i in 1:nrow(types)){ - if(toString(types[i,"type"]) == type.name) - return(TRUE) - } - return(FALSE) -} - -# Search type information for the given type.name. -search.type.info <- function(name.type){ - types <- types - for(j in 1:nrow(types)) - if(types[j,]$type == name.type) - return(types[j,]) -} - -# Search event informations for the given column. -search.event <- function(column.index){ - events <- events - column.index <- as.integer(column.index) - for(j in 1:nrow(events)) - if(events[j,]$column == column.index) - return(events[j,]) - stop(paste("Events definition is not complete!: events for column ", toString(column.index), " not found!", sep = ""), call.= FALSE) -} - -exists.event <- function(event.name){ - events <- events - for(j in 1:nrow(events)) - if(events[j,]$event == event.name) - return(events[j,]) - return(NULL) -} - diff --git a/R/selection.R b/R/selection.R new file mode 100644 index 00000000..45ed4a4f --- /dev/null +++ b/R/selection.R @@ -0,0 +1,200 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + + + +#' select a subset of the input genotypes 'x'. Selection can be done +#' by frequency and gene symbols. +#' @title events.selection +#' +#' @examples +#' data(test_dataset) +#' dataset = events.selection(test_dataset, 0.3) +#' +#' @param x A TRONCO compliant dataset. +#' @param filter.freq [0,1] value which constriants the minimum frequence of selected events +#' @param filter.in.names gene symbols which will be included +#' @param filter.out.names gene symbols which will NOT be included +#' @return A TRONCO compliant dataset. +#' @export events.selection +events.selection = function(x, filter.freq=NA, filter.in.names=NA, filter.out.names=NA) { + + is.compliant(x, err.fun='events.selection: input') + dataset = x$genotypes + + cat(paste('*** Events selection: #events=', nevents(x), ', #types=', ntypes(x), sep='')) + + cat(paste(' Filters freq|in|out = {', + !is.na(filter.freq), ', ', + !any(is.na(filter.in.names)), ', ', + !any(is.na(filter.out.names)), '}', sep='')) + + + if(is.na(filter.out.names) && is.na(filter.in.names) && is.na(filter.freq)) { + return(x) + } + + valid = rep(FALSE, ncol(x$genotypes)) + + if(!is.na(filter.freq)) { + cat(paste('\nMinimum event frequency: ', filter.freq, ' (', + round(nsamples(x) * filter.freq, 0),' alterations out of ', nsamples(x),' samples).\n', sep='')) + x = enforce.numeric(x) + + if(!exists('hide.progress.bar') || !hide.progress.bar) { + flush.console() + pb = txtProgressBar(1, nevents(x), style = 3) + } + for(i in 1:nevents(x)) { + if(!exists('hide.progress.bar') || !hide.progress.bar) { + setTxtProgressBar(pb, i) + } + + mut.freq <- sum(x$genotypes[,i])/nsamples(x) + valid[i] <- mut.freq > filter.freq + } + if(!exists('hide.progress.bar') || !hide.progress.bar) { + close(pb) + } + + cat(paste('Selected ', nrow(as.events(x)[valid, ]), ' events.\n', sep='')) + } + + if(!any(is.na(filter.in.names))) { + shown = min(5, length(filter.in.names)) + + cat(paste('\n[filter.in] Genes hold: ', + sep='', paste(filter.in.names[1:shown], collapse=', '), ' ... ')) + + colnames = which(x$annotations[,2] %in% filter.in.names, arr.ind=T) + + k = unique(x$annotations[ + which(x$annotations[,'event'] %in% filter.in.names, arr.ind=T), 'event' + ]) + + cat(paste(' [', length(k), '/', length(filter.in.names), ' found].', sep='')) + + valid[colnames] = TRUE + } + + if(!any(is.na(filter.out.names))) { + shown = min(5, length(filter.out.names)) + + cat(paste('\n[filter.out] Genes dropped: ', + sep='', paste(filter.out.names[1:shown], collapse=', '), ' ... ')) + + colnames = which(x$annotations[,2] %in% filter.out.names, arr.ind=T) + cat(paste(' [', length(colnames), '/', length(filter.out.names), ' found].', sep='')) + + valid[colnames] = FALSE + } + + y = list() + y$genotypes = x$genotypes[, valid, drop = FALSE] + + y$annotations = as.matrix(x$annotations[valid, , drop = FALSE]) + colnames(y$annotations) = c('type', 'event') + rownames(y$annotations) = colnames(y$genotypes) + + y$types = as.matrix(x$types[unique(y$annotations[,1]), 1]) + colnames(y$types) = c('color') + rownames(y$types) = unique(y$annotations[,1]) + + if(!is.null(x$stages)) y$stages=x$stages + is.compliant(x, err.fun='events.selection: output') + + cat(paste('\nSelected ', nevents(y), ' events, returning.\n', sep='')) + + return(y) +} + +#' Return the first n recurrent events +#' @title rank.recurrents +#' +#' @examples +#' data(test_dataset) +#' dataset = rank.recurrents(test_dataset, 10) +#' +#' @param x A TRONCO compliant dataset. +#' @param n The number of events to rank +#' @return the first n recurrent events +#' @export rank.recurrents +rank.recurrents = function(x, n) +{ + is.compliant(x) + x = enforce.numeric(x) + + + if(n <= 0) { + stop('Rank value (n) should be positive.') + } + + # Sum columns + sums = colSums(x$genotypes) + + # Get the names of the first n ranked + sorted = sort(sums, decreasing = T) + + # print(sorted[1:20]) + + scores = unique(sorted) + # print(scores) + + l = length(scores) + if(n >l) { + warning(paste0('Rank contains ', l, ' unique entries, using n=', l, ' instead of n=', n)) + } + + n = min(n, length(scores)) + scores = scores[1:n] + + sorted = sorted[which(sorted >= min(scores))] + + max = names(sorted[which(sorted == max(scores))]) + min = names(sorted[which(sorted == min(scores))]) + + cat(paste0('Most recurrent(s): ', paste(as.events(x)[max, 'event'], collapse=', '), ' (', (max(scores)), ' hits).\n' )) + cat(paste0(n, '-th recurrent(s): ', paste(as.events(x)[min, 'event'], collapse=', '), ' (', (min(scores)), ' hits).\n' )) + + + order = names(sorted) + genes = as.events(x)[order, 'event'] + + return(as.vector(genes)) +} + +#' Filter a dataset based on selected samples id +#' @title samples.selection +#' +#' @examples +#' data(test_dataset) +#' dataset = samples.selection(test_dataset, c('patient 1', 'patient 2')) +#' +#' @param x A TRONCO compliant dataset. +#' @param samples A list of samples +#' @return A TRONCO compliant dataset. +#' @export samples.selection +samples.selection = function(x, samples) +{ + is.compliant(x, 'Input:') + + missing = setdiff(samples, as.samples(x)) + if(length(missing) > 0) { + warning(paste('Missing samples: ', paste(missing, collapse=', '))) + } + + delete = setdiff(as.samples(x), samples) + return(delete.samples(x, delete)) +} diff --git a/R/topology.class.R b/R/topology.class.R deleted file mode 100644 index fd51ff6c..00000000 --- a/R/topology.class.R +++ /dev/null @@ -1,74 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @import methods -# The topology class definition -topology = NULL; -setClass("topology", - representation( - dataset = "data.frame", - - marginal.probs = "matrix", - joint.probs = "matrix", - cond.probs = "matrix", - - estimated.marginal.probs = "matrix", - estimated.joint.probs = "matrix", - estimated.cond.probs = "matrix", - edge.confidence = "matrix", - confidence = "list", - - pr.score = "matrix", - adj.matrix = "matrix", - adj.matrix.bic = "matrix", - - is.valid = "logical", - invalid.events = "list", - - error.fp = "numeric", - error.fn = "numeric", - bootstrap.settings = "list", - bootstrap = "logical", - - algorithm = "character")) - -# A summary is displayed if the object name of class -# topology is written in the R console. -setMethod("show", "topology", - function(object){ - tree <- c() - adj.matrix <- object@adj.matrix - names <- colnames(adj.matrix) - for(i in 1:nrow(adj.matrix)) - for(j in 1:ncol(adj.matrix)) - if(adj.matrix[i,j] == 1) - tree <- c(tree, - paste(names[i], " -> ", names[j], "\n")) - cat(" Tree progression model with ", ncol(adj.matrix), "events \n") - cat(" ") - cat(tree) - cat("\n Estimated false positives error rate:", object@error.fp) - cat("\n Estimated false negative error rate:", object@error.fn) - - if(object@bootstrap){ - cat("\n") - cat("\n Executed", object@bootstrap.settings$type, "bootstrap, nboot:", topology@bootstrap.settings$nboot) - cat("\n Confidence overall value:", object@confidence$overall.value) - cat("\n Confidence overall frequency:", object@confidence$overall.frequency) - } -}) \ No newline at end of file diff --git a/R/trim.R b/R/trim.R deleted file mode 100644 index 7bc46a17..00000000 --- a/R/trim.R +++ /dev/null @@ -1,45 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -# Resizes a string if spaces in the beginning and in the end are found -trim <- function(string){ - - dim <- as.integer(nchar(string)) - str <- substr(string,1,1) - p <- " " - - # Cuts the left blank space if at least one is found. - while((str == p)){ - string <- substr(string,2,dim) - dim <- as.integer(nchar(string)) - str <- substr(string,1,1) - } - - dim <- as.integer(nchar(string)) - str <- substr(string,dim,dim) - p <- " " - - # Cuts the right blank space if at least one is found. - while((str == p)){ - string <- substr(string,1,(dim - 1)) - dim <- as.integer(nchar(string)) - str <- substr(string,dim,dim) - } - - return(string) -} \ No newline at end of file diff --git a/R/trim.events.R b/R/trim.events.R deleted file mode 100644 index 6bf2a9b7..00000000 --- a/R/trim.events.R +++ /dev/null @@ -1,29 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -# Trim spaces in the beginning and in the end of the names in Events variable -trim.events <- function(events){ - for(i in 1:nrow(events)){ - for(j in 1:(ncol(events) - 1)){ - e <- toString(events[i,j]) - trimmed <- trim(e) - events[i,j] <- trimmed - } - } - return(events) -} \ No newline at end of file diff --git a/R/trim.types.R b/R/trim.types.R deleted file mode 100644 index 4e0319b2..00000000 --- a/R/trim.types.R +++ /dev/null @@ -1,29 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -# Trim spaces in the beginning and in the end of the names in Tyeps variable -trim.types <- function(types){ - for(i in 1:nrow(types)){ - for(j in 1:ncol(types)){ - t <- toString(types[i,j]) - trimmed <- trim(t) - types[i,j] <- trimmed - } - } - return(types) -} \ No newline at end of file diff --git a/R/tronco.R b/R/tronco.R new file mode 100644 index 00000000..80295523 --- /dev/null +++ b/R/tronco.R @@ -0,0 +1,1820 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + +#' TRONCO (TRanslational ONCOlogy) is a R package which collects +#' algorithms to infer progression models from Bernoulli 0/1 profiles of genomic +#' alterations across a tumor sample. Such profiles are usually visualised as a +#' binary input matrix where each row represents a patient's sample (e.g., the +#' result of a sequenced tumor biopsy), and each column an event relevant to the +#' progression (a certain type of somatic mutation, a focal or higher-level +#' chromosomal copy number alteration etc.); a 0/1 value models the absence/presence +#' of that alteration in the sample. In this version of TRONCO such profiles can +#' be readily imported by boolean matrices and MAF/GISTIC files. The package provides +#' various functions to editing, visualise and subset such data, as well as functions +#' to query the Cbio portal for cancer genomics. This version of TRONCO comes with +#' the parallel implementations the CAPRESE [PLoS ONE 9(12): e115570] and CAPRI +#' [Bioinformatics, doi:10.1093/bioinformatics/btv296] algorithms to infer possible +#' progression models arranged as trees, or general direct acyclic graphs. +#' Bootstrap functions to assess the parametric, non-prametric and statistical +#' confidence of every inferred model are also provided. The package comes with +#' some data available as well, which include the dataset of Atypical Chronic Myeloid +#' Leukemia samples provided by Piazza et al., Nat. Genet., 45 (2013), and examples. +#' +#' @docType package +#' @name TRONCO +NULL + +#' Reconstruct a progression model using CAPRESE algorithm +#' +#' @examples +#' data(test_dataset) +#' recon = tronco.caprese(test_dataset) +#' tronco.plot(recon) +#' +#' @title tronco caprese +#' @param data A TRONCO compliant dataset. +#' @param lambda Coefficient to combine the raw estimate with a correction factor into a shrinkage estimator. +#' @param do.estimation A parameter to disable/enable the estimation of the error rates give the reconstructed model. +#' @param silent A parameter to disable/enable verbose messages. +#' @return A TRONCO compliant object with reconstructed model +#' @export tronco.caprese +#' @import doParallel +tronco.caprese <- function(data, + lambda = 0.5, + do.estimation = FALSE, + silent = FALSE ) +{ + + ############### + # DEV VERSION # + ############### + if(do.estimation) { + if(silent==FALSE) { + cat("The estimation of the error rates is not available in the current version. Disabling the estimation...") + } + do.estimation = FALSE + } + + #check for the inputs to be correct + if(is.null(data) || is.null(data$genotypes)) { + stop("The dataset given as input is not valid."); + } + if(lambda < 0 || lambda > 1) { + stop("The value of the shrinkage parameter lambda has to be in [0:1]!",call.=FALSE); + } + + # check for the input to be compliant + is.compliant(data) + + #reconstruct the reconstruction with CAPRESE + if(silent==FALSE) { + cat('*** Checking input events.\n') + invalid = consolidate.data(data, TRUE) + if(length(unlist(invalid)) > 0) warning( + "Input events should be consolidated - see consolidate.data." + ); + + cat(paste0( + '*** Inferring a progression model with the following settings.\n', + '\tDataset size: n = ', nsamples(data), ', m = ', nevents(data), '.\n', + '\tAlgorithm: CAPRESE with shrinkage coefficient: ', lambda, '.\n' + )) + } + reconstruction = caprese.fit(data$genotypes,lambda,do.estimation,silent); + + rownames(reconstruction$confidence) = c("temporal priority","probability raising","hypergeometric test"); + colnames(reconstruction$confidence) = "confidence"; + rownames(reconstruction$confidence[[1,1]]) = colnames(data$genotypes); + colnames(reconstruction$confidence[[1,1]]) = colnames(data$genotypes); + rownames(reconstruction$confidence[[2,1]]) = colnames(data$genotypes); + colnames(reconstruction$confidence[[2,1]]) = colnames(data$genotypes); + rownames(reconstruction$confidence[[3,1]]) = colnames(data$genotypes); + colnames(reconstruction$confidence[[3,1]]) = colnames(data$genotypes); + + for (i in 1:length(reconstruction$model)) { + + #set rownames and colnames to the probabilities + rownames(reconstruction$model[[i]]$probabilities$probabilities.observed$marginal.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.observed$marginal.probs) = "marginal probability"; + rownames(reconstruction$model[[i]]$probabilities$probabilities.observed$joint.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.observed$joint.probs) = colnames(data$genotypes); + rownames(reconstruction$model[[i]]$probabilities$probabilities.observed$conditional.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.observed$conditional.probs) = "conditional probability"; + + #set rownames and colnames to the parents positions + rownames(reconstruction$model[[i]]$parents.pos) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$parents.pos) = "parents"; + + #set rownames and colnames to the adjacency matrices + rownames(reconstruction$model[[i]]$adj.matrix$adj.matrix.fit) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$adj.matrix$adj.matrix.fit) = colnames(data$genotypes); + + if(do.estimation==TRUE) { + rownames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.marginal.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.marginal.probs) = "marginal probability"; + rownames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.joint.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.joint.probs) = colnames(data$genotypes); + rownames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.conditional.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.conditional.probs) = "conditional probability"; + } + + } + + # structure to save the results + results = data; + results$confidence = reconstruction$confidence; + results$model = reconstruction$model; + results$parameters = reconstruction$parameters; + results$execution.time = reconstruction$execution.time; + + # the reconstruction has been completed + if(!silent) cat(paste( + "The reconstruction has been successfully completed in", + format(.POSIXct(round(reconstruction$execution.time[3],digits=0),tz="GMT"),"%Hh:%Mm:%Ss"), + "\n")); + + return(results); + +} + + + +#' Reconstruct a progression model using CAPRI algorithm +#' +#' @examples +#' data(test_dataset) +#' recon = tronco.capri(test_dataset) +#' tronco.plot(recon) +#' +#' @title tronco capri +#' @param data A TRONCO compliant dataset. +#' @param command Parameter to define to heuristic search to be performed. Hill Climbing and Tabu search are currently available. +#' @param regularization Select the regularization for the likelihood estimation, e.g., BIC, AIC. +#' @param do.boot A parameter to disable/enable the estimation of the error rates give the reconstructed model. +#' @param nboot Number of bootstrap sampling (with rejection) to be performed when estimating the selective advantage scores. +#' @param pvalue Pvalue to accept/reject the valid selective advantage relations. +#' @param min.boot Minimum number of bootstrap sampling to be performed. +#' @param min.stat A parameter to disable/enable the minimum number of bootstrap sampling required besides nboot if any sampling is rejected. +#' @param boot.seed Initial seed for the bootstrap random sampling. +#' @param do.estimation A parameter to disable/enable the estimation of the error rates give the reconstructed model. +#' @param silent A parameter to disable/enable verbose messages. +#' @return A TRONCO compliant object with reconstructed model +#' @export tronco.capri +#' @importFrom bnlearn hc tabu +#' @import igraph +#' @import doParallel +tronco.capri <- function(data, + command = "hc", + regularization = c("bic","aic"), + do.boot = TRUE, + nboot = 100, + pvalue = 0.05, + min.boot = 3, + min.stat = TRUE, + boot.seed = NULL, + do.estimation = FALSE, + silent = FALSE ) +{ + + ############### + # DEV VERSION # + ############### + if(do.estimation) { + if(silent==FALSE) { + cat("The estimation of the error rates is not available in the current version. Disabling the estimation...") + } + do.estimation = FALSE + } + + #check for the inputs to be correct + if(is.null(data) || is.null(data$genotypes)) { + stop("The dataset given as input is not valid."); + } + if(is.null(data$hypotheses)) { + data$hypotheses = NA; + } + if(command != "hc" && command != "tabu") { + stop("The inference can be performed either by hill climbing or tabu search!",call.=FALSE); + } + if(pvalue < 0 || pvalue > 1) { + stop("The value of the pvalue has to be in [0:1]!",call.=FALSE); + } + + # check for the input to be compliant + is.compliant(data) + + # reconstruct the reconstruction with CAPRI + if(is.null(boot.seed)) { + my.seed = "NULL" + } + else { + my.seed = boot.seed; + } + if(silent==FALSE) { + cat('*** Checking input events.\n') + invalid = consolidate.data(data, TRUE) + if(length(unlist(invalid)) > 0) warning( + "Input events should be consolidated - see consolidate.data." + ); + + + cat(paste0( + '*** Inferring a progression model with the following settings.\n', + '\tDataset size: n = ', nsamples(data), ', m = ', nevents(data), '.\n', + '\tAlgorithm: CAPRI with \"', paste0(regularization,collapse=", "), '\" regularization and \"', command, '\" likelihood-fit strategy.\n', + '\tRandom seed: ', my.seed, '.\n', + '\tBootstrap iterations (Wilcoxon): ', ifelse(do.boot, nboot, 'disabled'), '.\n', + ifelse(do.boot, + paste0('\t\texhaustive bootstrap: ', min.stat, '.\n\t\tp-value: ', pvalue, '.\n\t\tminimum bootstrapped scores: ', min.boot, '.\n'), '') + )) + } + + reconstruction = capri.fit(data$genotypes,data$hypotheses,command=command,regularization=regularization,do.boot=do.boot,nboot=nboot,pvalue=pvalue,min.boot=min.boot,min.stat=min.stat,boot.seed=boot.seed,do.estimation=do.estimation,silent=silent); + + rownames(reconstruction$adj.matrix.prima.facie) = colnames(data$genotypes); + colnames(reconstruction$adj.matrix.prima.facie) = colnames(data$genotypes); + + rownames(reconstruction$confidence) = c("temporal priority","probability raising","hypergeometric test"); + colnames(reconstruction$confidence) = "confidence"; + rownames(reconstruction$confidence[[1,1]]) = colnames(data$genotypes); + colnames(reconstruction$confidence[[1,1]]) = colnames(data$genotypes); + rownames(reconstruction$confidence[[2,1]]) = colnames(data$genotypes); + colnames(reconstruction$confidence[[2,1]]) = colnames(data$genotypes); + rownames(reconstruction$confidence[[3,1]]) = colnames(data$genotypes); + colnames(reconstruction$confidence[[3,1]]) = colnames(data$genotypes); + + for (i in 1:length(reconstruction$model)) { + + #set rownames and colnames to the probabilities + rownames(reconstruction$model[[i]]$probabilities$probabilities.observed$marginal.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.observed$marginal.probs) = "marginal probability"; + rownames(reconstruction$model[[i]]$probabilities$probabilities.observed$joint.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.observed$joint.probs) = colnames(data$genotypes); + rownames(reconstruction$model[[i]]$probabilities$probabilities.observed$conditional.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.observed$conditional.probs) = "conditional probability"; + + #set rownames and colnames to the parents positions + rownames(reconstruction$model[[i]]$parents.pos) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$parents.pos) = "parents"; + + #set rownames and colnames to the adjacency matrices + rownames(reconstruction$model[[i]]$adj.matrix$adj.matrix.pf) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$adj.matrix$adj.matrix.pf) = colnames(data$genotypes); + rownames(reconstruction$model[[i]]$adj.matrix$adj.matrix.fit) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$adj.matrix$adj.matrix.fit) = colnames(data$genotypes); + + if(do.estimation==TRUE) { + rownames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.marginal.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.marginal.probs) = "marginal probability"; + rownames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.joint.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.joint.probs) = colnames(data$genotypes); + rownames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.conditional.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[i]]$probabilities$probabilities.fit$estimated.conditional.probs) = "conditional probability"; + } + + } + + # structure to save the results + results = data; + results$adj.matrix.prima.facie = reconstruction$adj.matrix.prima.facie + results$confidence = reconstruction$confidence; + results$model = reconstruction$model; + results$parameters = reconstruction$parameters; + results$execution.time = reconstruction$execution.time; + + + + + + # the reconstruction has been completed + if(!silent) cat(paste( + "The reconstruction has been successfully completed in", + format(.POSIXct(round(reconstruction$execution.time[3],digits=0),tz="GMT"),"%Hh:%Mm:%Ss"), + "\n")); + + return(results); +} + +# Not exporting this function for now. +# todo +# +# @examples +# data(test_model) +# recon = tronco.estimation(test_model) +# +# @title tronco.estimation +# @param reconstruction A TRONCO compliant dataset with a reconstructed model associated. +# @param error.rates todo +# @return A TRONCO compliant object with reconstructed model and estimations +tronco.estimation <- function( reconstruction, error.rates = NA ) { +############### +# DEV VERSION # +############### + + # check for the inputs to be correct + if(is.null(reconstruction)) { + stop("A valid reconstruction has to be provided in order to estimate its confidence.",call.=FALSE); + } + + # check for the input to be compliant + is.compliant(reconstruction) + + #run the estimations for the required algorithm + if(reconstruction$parameters$algorithm=="CAPRESE") { + + cat("Executing now the estimation procedure, this may take a long time...\n") + + # if I also need to estimate the error rates + if(is.na(error.rates[1])) { + # estimate the error rates + error.rates = estimate.tree.error.rates(as.marginal.probs(reconstruction,models="caprese")[[1]],as.joint.probs(reconstruction,models="caprese")[[1]],as.parents.pos(reconstruction,models="caprese")[[1]]); + } + + # estimate the probabilities given the error rates + estimated.probabilities = estimate.tree.probs(as.marginal.probs(reconstruction,models="caprese")[[1]],as.joint.probs(reconstruction,models="caprese")[[1]],as.parents.pos(reconstruction,models="caprese")[[1]],error.rates); + + # set the estimated error rates and probabilities + probabilities.fit = list(estimated.marginal.probs=estimated.probabilities$marginal.probs,estimated.joint.probs=estimated.probabilities$joint.probs,estimated.conditional.probs=estimated.probabilities$conditional.probs); + + reconstruction$model[["caprese"]]$error.rates = error.rates + reconstruction$model[["caprese"]]$probabilities$probabilities.fit = probabilities.fit + + # set colnames and rownames + rownames(reconstruction$model[["caprese"]]$probabilities$probabilities.fit$estimated.marginal.probs) = colnames(data$genotypes); + colnames(reconstruction$model[["caprese"]]$probabilities$probabilities.fit$estimated.marginal.probs) = "marginal probability"; + rownames(reconstruction$model[["caprese"]]$probabilities$probabilities.fit$estimated.joint.probs) = colnames(data$genotypes); + colnames(reconstruction$model[["caprese"]]$probabilities$probabilities.fit$estimated.joint.probs) = colnames(data$genotypes); + rownames(reconstruction$model[["caprese"]]$probabilities$probabilities.fit$estimated.conditional.probs) = colnames(data$genotypes); + colnames(reconstruction$model[["caprese"]]$probabilities$probabilities.fit$estimated.conditional.probs) = "conditional probability"; + + } + else if(reconstruction$parameters$algorithm=="CAPRI") { + + ############### + # DEV VERSION # + ############### + stop("The estimation of the error rates is not available in the current version.") + + + cat("Executing now the estimation procedure, this may take a long time...\n") + + # go through the models + do.estimate.error.rates = FALSE; + if(is.na(error.rates[1])) { + do.estimate.error.rates = TRUE; + } + for (m in names(as.models(reconstruction))) { + + # if I also need to estimate the error rates + if(do.estimate.error.rates) { + # estimate the error rates + error.rates = estimate.dag.error.rates(reconstruction$genotypes,as.marginal.probs(reconstruction,models=m)[[1]],as.joint.probs(reconstruction,models=m)[[1]],as.parents.pos(reconstruction,models=m)[[1]]); + } + + # estimate the probabilities given the error rates + estimated.probabilities = estimate.dag.probs(reconstruction$genotypes,as.marginal.probs(reconstruction,models=m)[[1]],as.joint.probs(reconstruction,models=m)[[1]],as.parents.pos(reconstruction,models=m)[[1]],error.rates); + + # set the estimated error rates and probabilities + probabilities.fit = list(estimated.marginal.probs=estimated.probabilities$marginal.probs,estimated.joint.probs=estimated.probabilities$joint.probs,estimated.conditional.probs=estimated.probabilities$conditional.probs); + + reconstruction$model[[m]]$error.rates = error.rates + reconstruction$model[[m]]$probabilities$probabilities.fit = probabilities.fit + + # set colnames and rownames + rownames(reconstruction$model[[m]]$probabilities$probabilities.fit$estimated.marginal.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[m]]$probabilities$probabilities.fit$estimated.marginal.probs) = "marginal probability"; + rownames(reconstruction$model[[m]]$probabilities$probabilities.fit$estimated.joint.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[m]]$probabilities$probabilities.fit$estimated.joint.probs) = colnames(data$genotypes); + rownames(reconstruction$model[[m]]$probabilities$probabilities.fit$estimated.conditional.probs) = colnames(data$genotypes); + colnames(reconstruction$model[[m]]$probabilities$probabilities.fit$estimated.conditional.probs) = "conditional probability"; + + + } + } + else { + stop("A valid algorithm has to be provided in order to estimate its confidence.",call.=FALSE); + } + + reconstruction$parameters$do.estimation = TRUE; + return(reconstruction); + +} + + +#' Bootstrap a reconstructed progression model +#' +#' @examples +#' data(test_dataset) +#' recon = tronco.capri(test_dataset) +#' boot = tronco.bootstrap(recon, nboot=5) +#' tronco.plot(boot) +#' +#' @title tronco bootstrap +#' @param reconstruction The output of tronco.capri or tronco.caprese +#' @param type Parameter to define the type of sampling to be performed, e.g., non-parametric for uniform sampling. +#' @param nboot Number of bootstrap sampling to be performed when estimating the model confidence. +#' @param verbose Should I be verbose? +#' @return A TRONCO compliant object with reconstructed model +#' @import doParallel +#' @export tronco.bootstrap +tronco.bootstrap <- function( reconstruction, + type = "non-parametric", + nboot = 100, + verbose = FALSE) +{ + # check for the inputs to be given + if(is.null(reconstruction)) { + stop("A valid reconstruction has to be provided in order to estimate its confidence.", call. = FALSE) + } + + # check for the input to be compliant + is.compliant(reconstruction) + + ############### + # DEV VERSION # + ############### + if(type == "parametric") { + stop("The parametric bootstrap is not available in the current version. Please choose an other option...") + } + + if(reconstruction$parameters$do.estimation == FALSE && type == "parametric") { + stop("To perform parametric bootstrap, the estimation of the error rates and probabilities\nshould be performed.", call. = FALSE) + } + + if(type == "statistical" && !(reconstruction$parameters$algorithm == "CAPRI" && reconstruction$parameters$do.boot == TRUE)) { + stop("To perform statistical bootstrap, the algorithm used for the reconstruction\nmust by CAPRI with bootstrap.", call. = FALSE) + } + + # set all the needed parameters to perform the bootstrap estimation + if(type == "non-parametric" || type == "parametric" || type == "statistical") { + + dataset = reconstruction$genotypes + do.estimation = FALSE + silent = TRUE + + if(!is.null(reconstruction$bootstrap)) { + bootstrap = reconstruction$bootstrap + } + else { + bootstrap = list() + } + + if(reconstruction$parameters$algorithm == "CAPRESE") { + lambda = reconstruction$parameters$lambda + } else if(reconstruction$parameters$algorithm == "CAPRI") { + + if(!is.null(reconstruction$hypotheses)) { + hypotheses = reconstruction$hypotheses + } else { + hypotheses = NA + } + + command.capri = reconstruction$parameters$command + regularization = reconstruction$parameters$regularization + do.boot = reconstruction$parameters$do.boot + nboot.capri = reconstruction$parameters$nboot + pvalue = reconstruction$parameters$pvalue + min.boot = reconstruction$parameters$min.boot + min.stat = reconstruction$parameters$min.stat + boot.seed = reconstruction$parameters$boot.seed + if(type == 'statistical') boot.seed = NULL + } + } else { + stop("The types of bootstrap that can be performed are: non-parametric,\nparametric or statistical.", call. = FALSE) + } + + # perform the selected bootstrap procedure + cat("Executing now the bootstrap procedure, this may take a long time...\n") + + if(reconstruction$parameters$algorithm == "CAPRESE") { + + curr.boot = bootstrap.caprese(dataset, + lambda, + do.estimation, + silent, + reconstruction, + type, + nboot, + bootstrap) + + + reconstruction$bootstrap = curr.boot + + cat(paste("\nPerformed ", type, " bootstrap with ", nboot, " resampling and ", lambda, "\nas shrinkage parameter.\n\n", sep ="")) + + } else if(reconstruction$parameters$algorithm == "CAPRI") { + curr.boot = bootstrap.capri(dataset, + hypotheses, + command.capri, + regularization, + do.boot, + nboot.capri, + pvalue, + min.boot, + min.stat, + boot.seed, + do.estimation, + silent, + reconstruction, + type, + nboot, + bootstrap, + verbose) + + + reconstruction$bootstrap = curr.boot + + if(do.boot == TRUE) { + cat(paste("\nPerformed ", type, " bootstrap with ", nboot, " resampling and ", pvalue, " as pvalue \nfor the statistical tests.\n\n", sep ="")) + } else { + cat(paste("\nPerformed ", type, " bootstrap with ", nboot, " resampling.\n\n", sep ="")) + } + } + + return(reconstruction) +} + +#' Plots a progression model from a recostructed dataset +#' @title tronco.plot +#' +#' @examples +#' data(test_model) +#' tronco.plot(test_model) +#' +#' @param x A reconstructed model (the output of tronco.capri or tronco.caprese) +#' @param regularization A vector containing the names of regularizators used (BIC or AIC) +#' @param fontsize For node names. Default NA for automatic rescaling +#' @param height Proportion node height - node width. Default height 2 +#' @param width Proportion node height - node width. Default width 2 +#' @param height.logic Height of logical nodes. Defaul 1 +#' @param pf Should I print Prima Facie? Default False +#' @param disconnected Should I print disconnected nodes? Default False +#' @param scale.nodes Node scaling coefficient (based on node frequency). Default NA (autoscale) +#' @param title Title of the plot. Default as.description(x) +#' @param confidence Should I add confidence informations? No if NA +#' @param p.min p-value cutoff. Default automatic +#' @param legend Should I visualise the legend? +#' @param legend.cex CEX value for legend. Default 1.0 +#' @param edge.cex CEX value for edge labels. Default 1.0 +#' @param label.edge.size Size of edge labels. Default NA for automatic rescaling +#' @param expand Should I expand hypotheses? Default TRUE +#' @param genes Visualise only genes in this list. Default NULL, visualise all. +#' @param relations.filter Filter relations to dispaly according to this functions. Default NA +#' @param edge.color Edge color. Default 'black' +#' @param pathways.color RColorBrewer colorser for patways. Default 'Set1'. +#' @param file String containing filename for PDF output. If NA no PDF output will be provided +#' @param legend.pos Legend position. Default 'bottom', +#' @param pathways A vector containing pathways information as described in as.patterns() +#' @param lwd Edge base lwd. Default 3 +#' @param annotate.sample = List of samples to search for events in model +#' @param ... Additional arguments for RGraphviz plot function +#' @return Information about the reconstructed model +#' @export tronco.plot +#' @importFrom RColorBrewer brewer.pal.info brewer.pal +#' @import Rgraphviz +#' @import igraph +tronco.plot = function(x, + regularization = names(x$model), + fontsize = NA, + height=2, + width=3, + height.logic = 1, + pf = FALSE, + disconnected=FALSE, + scale.nodes=NA, + title = as.description(x), + confidence = NA, + p.min = x$parameters$pvalue, + legend = TRUE, + legend.cex = 1.0, + edge.cex = 1.0, + label.edge.size = NA, + expand = TRUE, + genes = NULL, + relations.filter = NA, + edge.color = 'black', + pathways.color = 'Set1', + file = NA, # print to pdf, + legend.pos = 'bottom', + pathways = NULL, + lwd = 3, + annotate.sample = NA, + ... + ) +{ + hidden.and = F + + + # Checks if reconstruction exists + if(missing(x)) { + stop("reconstruction missing, usage: hypo.plot(reconstruction, ...", call.=FALSE); + } + + logical_op = list("AND", "OR", "NOT", "XOR", "*", "UPAND", "UPOR", "UPXOR") + + + if(length(regularization) > 2) { + stop("Too many regularizators (max is 2)", call.=FALSE) + } + + if(!regularization[1] %in% names(x$model)) { + stop(paste(regularization[1], "not in model"), call.=FALSE); + } + + if(!is.na(annotate.sample) && !is.null(pathways)) + stop('Select either to annotate pathways or a sample.') + + # Annotate samples + if(!is.na(annotate.sample)) + { + if(!all(annotate.sample %in% as.samples(x))) + stop('Sample(s) to annotate are not in the dataset -- see as.samples.') + + if(npatterns(x) > 0) + nopatt.data = delete.type(x, 'Pattern') + else + nopatt.data = x + + + sample.events = Reduce(rbind, as.events.in.sample(nopatt.data, annotate.sample)) + sample.events = unique(sample.events[, 'event']) + + cat('Annotating sample', annotate.sample, 'with color red. Annotated genes:', paste(sample.events, collapse = ', '), '\n') + + pathways = list(sample.events) + names(pathways) = paste(annotate.sample, collapse = ', ') + if(nchar(names(pathways)) > 15) names(pathways) = paste0(substr(names(pathways), 1, 15), '...') + + pathways.color = 'red' + } + + sec = ifelse(length(regularization) == 2, T, F) + + if(sec && !regularization[2] %in% names(x$model)) { + stop(paste(regularization[2], "not in model"), call.=FALSE); + } + + # Models objects + primary = as.models(x, models = regularization[1])[[1]] + if(sec) + secondary = as.models(x, models = regularization[2])[[1]] + + # USARE getters adj.matrix + if (sec && !all( rownames(primary$adj.matrix$adj.matrix.fit) %in% rownames(secondary$adj.matrix$adj.matrix.fit))) { + stop("primary and secondary must have the same adj.matrix! See: the function tronco.bootstrap.", call.=FALSE) + } + + # Get the adjacency matrix - this could have been donw with getters + adj.matrix = primary$adj.matrix + if(sec) adj.matrix = secondary$adj.matrix + c_matrix = adj.matrix$adj.matrix.fit + + if(is.function(relations.filter)) + { + cat('*** Filtering relations according to function "relations.filter", visualizing:\n') + adj.matrix = as.adj.matrix(x, models = regularization) + sel.relation = as.selective.advantage.relations(x, models = regularization) + + # Select only relations which get TRUE by "relations.filter" + sel.relation = lapply(sel.relation, + function(z){ + # apply can not be used - implicit cohersion to char is crap + # z[ apply(z, 1, relations.filter), ] + #### + mask = rep(T, nrow(z)) + for(i in 1:nrow(z)) + mask[i] = relations.filter(z[i, ]) + return(z[mask, , drop = F]) + }) + + print(sel.relation) + + sel.relation = get(regularization[2], sel.relation) + + + c_matrix.names = rownames(c_matrix) + c_matrix = matrix(0, nrow = nrow(c_matrix), ncol = ncol(c_matrix)) + rownames(c_matrix) = c_matrix.names + colnames(c_matrix) = c_matrix.names + + cat(paste0('Selected relations: ', nrow(sel.relation), ' [out of ', nrow(as.selective.advantage.relations(x, models = regularization)[[2]]), ']\n')) + + if(nrow(sel.relation) > 0) { + for(i in 1:nrow(sel.relation)) { + c_matrix[ nameToKey(x, sel.relation[i, 'SELECTS']), nameToKey(x, sel.relation[i, 'SELECTED'])] = 1 + } + } + + + } + + # get the probabilities + probabilities = primary$probabilities + if(sec) { + probabilities = secondary$probabilities + } + marginal_p = probabilities$probabilities.observed$marginal.probs + + # if prima facie change the adj matrix + if (pf) { + c_matrix = adj.matrix$adj.matrix.pf + } + + if (all(c_matrix == F) || (sec && all(primary$adj.matrix$adj.matrix.fit == F))) { + warning('No edge in adjacency matrix! Nothing to show here.') + return(NULL) + } + + + # get algorithm parameters + parameters = x$parameters + + # get hypotheses + hypotheses = x$hypotheses + hstruct = NULL + if (!is.null(hypotheses) && !is.na(hypotheses) ) { + hstruct = hypotheses$hstructure + } + + # get event from genes list + events = NULL + if (is.vector(genes)) { + events = unlist(lapply(genes, function(x){names(which(as.events(x)[,'event'] == x))})) + } + + cat('*** Expanding hypotheses syntax as graph nodes:') + + # expand hypotheses + expansion = hypotheses.expansion(c_matrix, + hstruct, + hidden.and, + expand, + events) + hypo_mat = expansion[[1]] + hypos_new_name = expansion[[2]] + + cat('\n*** Rendering graphics\n') + + # remove disconnected nodes + if(!disconnected) { + cat('Nodes with no incoming/outgoing edges will not be displayed.\n') + del = which(rowSums(hypo_mat)+colSums(hypo_mat) == 0 ) + w = !(rownames(hypo_mat) %in% names(del)) + hypo_mat = hypo_mat[w,] + hypo_mat = hypo_mat[,w] + } + + + attrs = list(node = list()) + + hypo_graph = graph.adjacency(hypo_mat) + + v_names = gsub("_.*$", "", V(hypo_graph)$name) + if (!expand) { + v_names = gsub("^[*]_(.+)", "*", V(hypo_graph)$name) + } + new_name = list() + + for(v in v_names) { + if(v %in% rownames(x$annotations)) { + n = x$annotations[v,"event"] + new_name = append(new_name, n) + } else { + new_name = append(new_name, v) + } + } + + + V(hypo_graph)$label = new_name + + graph <- igraph.to.graphNEL(hypo_graph) + + node_names = V(hypo_graph)$name + + nAttrs = list() + + nAttrs$label = V(hypo_graph)$label + names(nAttrs$label) = node_names + + # set a default color + nAttrs$fillcolor = rep('White', length(node_names)) + names(nAttrs$fillcolor) = node_names + + # set fontsize + + if(is.na(fontsize)) { + fontsize = 24 - 4*log(nrow(hypo_mat)) + cat(paste0('Set automatic fontsize scaling for node labels: ', fontsize, '\n')) + } + nAttrs$fontsize = rep(fontsize, length(node_names)) + names(nAttrs$fontsize) = node_names + + # set node shape + nAttrs$shape = rep('ellipse', length(node_names)) + names(nAttrs$shape) = node_names + + # set node height + nAttrs$height = rep(height, length(node_names)) + names(nAttrs$height) = node_names + + # set node width + nAttrs$width = rep(width, length(node_names)) + names(nAttrs$width) = node_names + + + + short.label = nAttrs$label + names(short.label) = names(nAttrs$label) + if (!is.na(scale.nodes)) { + + + # foreach node + min_p = min(marginal_p) + max_p = max(marginal_p) + + for (node in node_names) { + prefix = gsub("_.*$", "", node) + if ( !(prefix %in% logical_op)) { + # Scaling ANDRE + increase_coeff = scale.nodes + (marginal_p[node,] - min_p) / (max_p - min_p) + nAttrs$width[node] = nAttrs$width[node] * increase_coeff + nAttrs$height[node] = nAttrs$height[node] * increase_coeff + nAttrs$label[node] = paste0(nAttrs$label[node], '\\\n', round(marginal_p[node, ]*100, 0), '%', ' (', sum(as.genotypes(x)[, node]) ,')') + + } + } + } + + # use colors defined in tronco$types + w = unlist(lapply(names(nAttrs$fillcolor), function(w){ + if (w %in% rownames(x$annotations)) { + x$types[x$annotations[w,'type'], 'color'] + } + else + 'White' + })) + nAttrs$fillcolor[] = w + + legend_logic = NULL + + # set color, size form and shape each logic nodes (if hypos expansion actived) + node.type = 'box' + if (expand) { + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'OR' + if (any(w)) { + legend_logic['Exclusivity (soft)'] = 'orange' + } + nAttrs$fillcolor[which(w)] = 'orange' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'AND' + if (any(w)) { + legend_logic['Co-occurence'] = 'darkgreen' + } + nAttrs$fillcolor[which(w)] = 'darkgreen' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'XOR' + if (any(w)) { + legend_logic['Exclusivity (hard)'] = 'red' + } + nAttrs$fillcolor[which(w)] = 'red' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'UPOR' + if (any(w)) { + legend_logic['Exclusivity (soft)'] = 'orange' + } + nAttrs$fillcolor[which(w)] = 'orange' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'UPAND' + if (any(w)) { + legend_logic['Co-occurence'] = 'lightgreen' + } + nAttrs$fillcolor[which(w)] = 'lightgreen' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'UPXOR' + if (any(w)) { + legend_logic['Exclusivity (hard)'] = 'red' + } + nAttrs$fillcolor[which(w)] = 'red' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + } + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == '*' + if (any(w)) { + legend_logic['Co-occurence'] = 'darkgreen' + } + nAttrs$fillcolor[which(w)] = 'darkgreen' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + + # node border to black + nAttrs$color = rep("black", length(node_names)) + names(nAttrs$color) = node_names + + nAttrs$fontcolor = rep("black", length(node_names)) + names(nAttrs$fontcolor) = node_names + + nAttrs$lwd = rep(1, length(node_names)) + names(nAttrs$lwd) = node_names + + # set node border based on pathways information + legend_pathways = NULL + if(!is.null(pathways)) { + cat('Annotating nodes with pathway information. \n') + + if(length(pathways.color) == 1 && pathways.color %in% rownames(brewer.pal.info)) + { + cat('Annotating pathways with RColorBrewer color palette', pathways.color, '.\n') + cols = brewer.pal(n=length(names(pathways)), name=pathways.color) + } + else + { + if(length(pathways.color) != length(names(pathways))) + stop('You did not provide enough colors to annotate', length(names(pathways)), 'pathways. + Either set pathways.color to a valid RColorBrewer palette or provide the explicit correct number of colors.') + cols = pathways.color + } + + + names(cols) = names(pathways) + names(nAttrs$col) = node_names + + for(path in names(pathways)) { + n = short.label[which(short.label %in% pathways[[path]])] + nAttrs$color[unlist(names(n))] = cols[[path]] + nAttrs$fontcolor[unlist(names(n))] = cols[[path]] + + nAttrs$lwd[unlist(names(n))] = 4 + + if(length(n) > 0) { + legend_pathways[path] = cols[[path]] + } + } + } + + # edges properties + + edge_names = edgeNames(graph) + eAttrs = list() + + # set temporary edge shape + eAttrs$lty = rep("solid", length(edge_names)) + names(eAttrs$lty) = edge_names + + # set temporary fontocolor + eAttrs$fontcolor = rep("darkblue", length(edge_names)) + names(eAttrs$fontcolor) = edge_names + + #set edge thikness based on prob + eAttrs$lwd = rep(1, length(edge_names)) + names(eAttrs$lwd) = edge_names + + #set edge name based on prob + eAttrs$label = rep('', length(edge_names)) + names(eAttrs$label) = edge_names + + #set fontsize to label.edge.size (default) + if(is.na(label.edge.size)) { + label.edge.size = fontsize/2 + cat(paste0('Set automatic fontsize for edge labels: ', label.edge.size, '\n')) + } + eAttrs$fontsize = rep(label.edge.size, length(edge_names)) + names(eAttrs$fontsize) = edge_names + + #set edge color to black (default) + eAttrs$color = rep(ifelse(sec, 'darkgrey', edge.color), length(edge_names)) + names(eAttrs$color) = edge_names + + #set edge arrowsize to 1 (default) + eAttrs$arrowsize = rep(1 * edge.cex, length(edge_names)) + names(eAttrs$arrowsize) = edge_names + + #record logic edge + eAttrs$logic = rep(F, length(edge_names)) + names(eAttrs$logic) = edge_names + + if(any(!is.na(confidence))) { + cat('Adding confidence information: ') + conf = as.confidence(x, confidence) + cat(paste(paste(confidence, collapse = ', '), '\n')) + + for(e in edge_names) { + edge = unlist(strsplit(e, '~')) + + from = edge[1] + to = edge[2] + + pval.names = c('hg', 'pr', 'tp') + boot.names = c('npb', 'pb', 'sb') + red.lable = FALSE + + if(is.logic.node.up(from) || is.logic.node.down(to)) { + next + } + + if(from %in% names(hypos_new_name)){ conf_from = hypos_new_name[[from]] } else { conf_from = from } + if(to %in% names(hypos_new_name)){ conf_to = hypos_new_name[[to]] } else { conf_to = to } + + for(i in confidence) { + + conf_sel = get(i, as.confidence(x, i)) + + + if (! i %in% pval.names) + { + if (sec && primary$adj.matrix$adj.matrix.fit[conf_from, conf_to] == 0) { + conf_sel = get(regularization[[2]], conf_sel) + } else { + conf_sel = get(regularization[[1]], conf_sel) + } + } + + conf_p = conf_sel + + if(! (conf_from %in% rownames(conf_p) && conf_to %in% colnames(conf_p))) { + next + } + + if (i %in% boot.names) { + eAttrs$lwd[e] = (conf_p[conf_from, conf_to] * 5) + 1 + } + + eAttrs$label[e] = paste0( + eAttrs$label[e], + ifelse(conf_p[conf_from, conf_to] < 0.01, "< 0.01", round(conf_p[conf_from, conf_to], 2))) + + if(i %in% pval.names && conf_p[conf_from, conf_to] > p.min) { + eAttrs$fontcolor[e] = 'red' + eAttrs$label[e] = paste0(eAttrs$label[e], ' *') + } + eAttrs$label[e] = paste0(eAttrs$label[e], '\\\n') + + } + + } + cat('RGraphviz object prepared.\n') + } + + # remove arrows from logic node (hidden and) + for(e in edge_names) { + edge = unlist(strsplit(e, '~')) + from = edge[1] + to = edge[2] + + if (is.logic.node.down(to)) { + eAttrs$logic[e] = T + eAttrs$arrowsize[e] = 0 + + if(substr(to, start=1, stop=2) == 'OR') + eAttrs$color[e] = 'orange' + if(substr(to, start=1, stop=3) == 'XOR') + eAttrs$color[e] = 'red' + if(substr(to, start=1, stop=3) == 'AND') + eAttrs$color[e] = 'darkgreen' + + eAttrs$lty[e] = 'dashed' + + nAttrs$shape[to] = 'circle' + + } + + if (is.logic.node.up(from)) { + eAttrs$logic[e] = T + eAttrs$arrowsize[e] = 0 + + eAttrs$lty[e] = 'dashed' + + if(substr(from, start=1, stop=4) == 'UPOR') + eAttrs$color[e] = 'orange' + if(substr(from, start=1, stop=5) == 'UPXOR') + eAttrs$color[e] = 'red' + if(substr(from, start=1, stop=5) == 'UPAND') + eAttrs$color[e] = 'darkgreen' + + + } else if(substr(from, start=1, stop=1) == '*') { + eAttrs$logic[e] = T + eAttrs$arrowsize[e] = 0 + eAttrs$color[e] = 'black' + } + } + + + if(pf) { + cat('*** Add prima facie edges: ') + # for each edge.. + bic = adj.matrix$adj.matrix.bic + + for(e in edge_names) { + edge = unlist(strsplit(e, '~')) + from = edge[1] + old_name = hypos_new_name[[from]] + if (!is.null(old_name)) { + from = old_name + } + to = edge[2] + if (substr(to, start=1, stop=1) == '*') { + to = substr(to, start=3, stop=nchar(to)) + } + + # ..checks if edge is present in BIC + + # check if edge in BIC (valid only if not logic edge) and 'to' is not a fake and + if ( (from %in% rownames(bic)) && + (to %in% colnames(bic)) && + !eAttrs$logic[e] && + bic[from, to] == 0 + ) { + eAttrs$color[e] = 'red' + } else { + #no PF + } + } + cat('done') + } + + if (sec) { + + pri.adj = primary$adj.matrix$adj.matrix.fit + for(from in rownames(pri.adj)) { + for(to in colnames(pri.adj)) { + from.alt.name = from + if (from %in% hypos_new_name) { + from.alt.name = names(which(hypos_new_name == from)) + } + + if(pri.adj[from, to] == 1) { + eAttrs$color[paste(from.alt.name, to, sep='~')] = edge.color + } + + } + } + } + + + + cat('Plotting graph and adding legends.\n') + plot(graph, nodeAttrs=nAttrs, edgeAttrs=eAttrs, main=title, ... ) + + # Adds the legend to the plot + if (legend) { + valid_events = colnames(hypo_mat)[which(colnames(hypo_mat) %in% colnames(c_matrix))] + legend_names = unique(x$annotations[which(rownames(x$annotations) %in% valid_events), 'type']) + pt_bg = x$types[legend_names, 'color'] + legend_colors = rep('black', length(legend_names)) + pch = rep(21, length(legend_names)) + + if (length(legend_logic) > 0) { + pch = c(pch, 0, 0, rep(22, length(legend_logic))) + legend_names = c(legend_names, ' ', expression(bold('Patterns')), names(legend_logic)) + legend_colors = c(legend_colors, 'white', 'white', rep('black', length(legend_logic))) + pt_bg = c(pt_bg, 'white', 'white', legend_logic) + } + + if (length(legend_pathways) > 0) { + pch = c(pch, 0, 0, rep(21, length(legend_pathways))) + legend_names = c(legend_names, ' ', expression(bold('Pathways')), names(legend_pathways)) + pt_bg = c(pt_bg, 'white', 'white', rep('white', length(legend_pathways))) + legend_colors = c(legend_colors, 'white', 'white', legend_pathways) + } + + if (legend.pos == 'bottom') { + legend.pos.l = 'bottomleft' + legend.pos.r = 'bottomright' + } else if (legend.pos == 'top') { + legend.pos.l = 'topleft' + legend.pos.r = 'topright' + } else { + legend.pos.l = locator(1) + legend.pos.r = locator(1) + } + + legend(legend.pos.r, + legend = legend_names, + title = expression(bold('Events type')), + bty = 'n', + cex = legend.cex, + pt.cex = 1.5 * legend.cex, + pch = pch, + col = legend_colors, + pt.bg = pt_bg) + + #add thickness legend + valid_names = node_names + + if(!disconnected) { + del = which(rowSums(hypo_mat) + colSums(hypo_mat) == 0 ) + w = !(rownames(hypo_mat) %in% names(del)) + valid_names = rownames(hypo_mat[w,]) + } + + if(expand) { + valid_names = valid_names[unlist(lapply(valid_names, function(x){!is.logic.node(x)}))] + } + + valid_names = grep('^[*]_(.+)$', valid_names, value = T, invert=T) + + + freq.labels = "" + stat.pch = 0 + pt.bg = "white" + col = "white" + if (any(!is.na(confidence))) { + freq.labels = c(expression(bold('Edge confidence')), lapply(confidence, function(x){ + if(x == "hg") + return("Hypergeometric test") + if(x == "tp") + return("Temporal Priority") + if(x == "pr") + return("Probability Raising") + if(x == "pb") + return("Parametric Bootstrap") + if(x == "sb") + return("Statistical Bootstrap") + if(x == "npb") + return("Non Parametric Bootstrap") + + }), paste("p <", p.min)) + stat.pch = c(0, rep(18, length(confidence)), 0) + pt.bg = c('white', rep('white', length(confidence)), 'white') + col = c('white', rep('black', length(confidence)), 'white') + } + + + + + + if('Pattern' %in% as.types(x)) { + y = delete.model(x) + y = delete.type(y, 'Pattern') + } else + y = x + + + freq.labels = c(freq.labels, + ' ', + expression(bold('Sample size')), + paste0('n = ', nsamples(x), ', m = ', nevents(x)), + paste0('|G| = ', ngenes(y), ', |P| = ', npatterns(x)) + ) + + reg.labels = c( '\n', + expression(bold('Regularization')), + paste0(names(x$model)) + ) + + + + stat.pch = c(stat.pch, rep(0, 2), rep(20, 2), rep(0, 2), rep(20, 2)) + pt.bg = c(pt.bg, rep('white', 2), rep('black', 2), rep('white', 2), 'black', 'darkgrey') + col = c(col, rep('white', 2), rep('white', 2), rep('white', 2),'black', 'darkgrey') + + legend(legend.pos.l, + legend = c(freq.labels, reg.labels), + title = "", + bty = 'n', + box.lty = 3, + box.lwd = .3, + pch = stat.pch, + pt.cex = 1.5 * legend.cex, + ncol = 1, + pt.bg = pt.bg, + cex = legend.cex, + col = col) + + } + + + if(!is.na(file)) { + cat('Saving visualized device to file:', file) + dev.copy2pdf(file = file) + } + cat('\n') +} + + + + + + + + + + + + + +# +# +# +# +# +# TEST TEST TEST TEST +# +# +# +# +# +# +# +# +# non è esportata e non è funzionante +# le dipendenze di igraph sono da sistemare +#' @import Rgraphviz +#' @importFrom RColorBrewer brewer.pal brewer.pal.info +tronco.consensus.plot = function(models, + secondary=NULL, + fontsize=18, + MIN.HITS = 1, + height=2, + width=3, + height.logic = 1, + pf = FALSE, + disconnected=FALSE, + scale.nodes=NA, + title = paste("Consensus Progression model"), + confidence = FALSE, + legend = TRUE, + legend.cex = 1.0, + edge.cex = 1.0, + label.edge.size = 12, + hidden.and = T, + expand = TRUE, + edge.color = 'black', + pathways.color = 'Set1', + file = NA, # print to pdf, + legend.pos = 'bottom', + pathways = NULL, + ... + ) +{ + + + +# Checks if reconstruction exists + if(missing(models) || !is.list(models)) { + stop("Models missing, usage: ... ...", call.=FALSE); + } + + logical_op = list("AND", "OR", "NOT", "XOR", "*", "UPAND", "UPOR", "UPXOR") + + + smaller.to.bigger = function(m,cn) + { + x = matrix(0, nrow = length(cn), ncol = length(cn)) + rownames(x) = cn + colnames(x) = cn + + + for(i in 1:nrow(m)) + for(j in 1:nrow(m)) + x[rownames(m)[i], rownames(m)[j]] = ifelse(m[i,j] == 1, 1, 0) + return(x) + } + + +# All the adjacency matrices + matrices = list() + for(i in 1:length(models)) + matrices = append(matrices, list(models[[i]]$adj.matrix$adj.matrix.bic)) + +# All their colnames - all possible eventsand types + cn = unique(Reduce(union, lapply(matrices, colnames))) + + all.events = NULL + for(i in 1:length(models)) all.events = rbind(all.events, as.events(models[[i]]$data)) + all.events = unique(all.events) + + all.types = NULL + for(i in 1:length(models)) all.types = rbind(all.types, models[[i]]$data$types) + all.types = unique(all.types) + +# Consensus + overall adjacency matrix + consensus = Reduce('+', lapply(matrices, smaller.to.bigger, cn=cn)) + adjacency = consensus + adjacency[adjacency < MIN.HITS] = 0 + adjacency[adjacency > 1] = 1 + + cat('Consensus adjacency matrix:', nrow(adjacency), 'x', ncol(adjacency), ', minimum consensus', MIN.HITS, '\n') + +# All the marginal probabilities - entries from each input model + marginal.probabilities = matrix(nrow = nrow(adjacency), ncol = 1) + rownames(marginal.probabilities) = rownames(adjacency) + + for(i in 1:length(models)) + { + model.marginals = models[[i]]$probabilities$probabilities.bic$marginal.probs + marginal.probabilities[rownames(model.marginals),] = model.marginals + } + +# Algorithm parameters should be the same for each model + parameters = models[[1]]$parameters + +# Retrieve all hypotheses - TODO constraint this to a model with hypotheses + all.hypos = list() + for(i in 1:length(models)) + all.hypos = append(all.hypos, models[[i]]$data$hypotheses$hstructure) + +# We unpack the env and collapse it back + all.hypos = sapply(all.hypos, as.list) # make them as list + all.hypos.names = NULL + hstruct = list() + for(i in 1:length(all.hypos)) + { +# Get hypotheses missing in hstruct + local.env = all.hypos[[i]] + local.env.names = names(local.env)[which( !(names(local.env) %in% names(hstruct)) )] + + all.hypos.names = c(all.hypos.names, local.env.names) + hstruct = append(hstruct, local.env[local.env.names]) + } + + cat('Found', length(hstruct), 'hypotheses\n') + hstruct = as.environment(hstruct) + +# hypotheses.expansion requires hypotheses to be stored as rightmost columns + adjacency = adjacency[, c(setdiff(colnames(adjacency), all.hypos.names), all.hypos.names)] + adjacency = adjacency[c(setdiff(colnames(adjacency), all.hypos.names), all.hypos.names), ] + + consensus = consensus[, c(setdiff(colnames(consensus), all.hypos.names), all.hypos.names)] + consensus = consensus[c(setdiff(colnames(consensus), all.hypos.names), all.hypos.names), ] + + all.events = all.events[colnames(adjacency), ] + +# Expand hypotheses + expansion = hypotheses.expansion(adjacency, + map = hstruct, + hidden_and = hidden.and, + expand = expand) + hypo_mat = expansion[[1]] + hypos_new_name = expansion[[2]] + +# Remove disconnected nodes + if(!disconnected) { + del = which(rowSums(hypo_mat)+colSums(hypo_mat) == 0 ) + w = !(rownames(hypo_mat) %in% names(del)) + hypo_mat = hypo_mat[w,] + hypo_mat = hypo_mat[,w] + } + + cat('\n*** Render graphics: ') + + attrs = list(node = list()) + + hypo_graph = graph.adjacency(hypo_mat) + + v_names = gsub("_.*$", "", V(hypo_graph)$name) + if (!expand) { + v_names = gsub("^[*]_(.+)", "*", V(hypo_graph)$name) + } + + new_name = list() + for(v in v_names) { + if(v %in% rownames(all.events)) { + n = all.events[v,"event"] + new_name = append(new_name, n) + } else { + new_name = append(new_name, v) + } + } + + V(hypo_graph)$label = new_name + graph <- igraph.to.graphNEL(hypo_graph) + + node_names = V(hypo_graph)$label + nAttrs = list() + + nAttrs$label = V(hypo_graph)$label + names(nAttrs$label) = node_names + +# set a default color + nAttrs$fillcolor = rep('White', length(node_names)) + names(nAttrs$fillcolor) = node_names + +# set fontsize + nAttrs$fontsize = rep(fontsize, length(node_names)) + names(nAttrs$fontsize) = node_names + +# set node shape + nAttrs$shape = rep('ellipse', length(node_names)) + names(nAttrs$shape) = node_names + +# set node height + nAttrs$height = rep(height, length(node_names)) + names(nAttrs$height) = node_names + +# set node width + nAttrs$width = rep(width, length(node_names)) + names(nAttrs$width) = node_names + + if (!is.na(scale.nodes)) { + + min_p = min(marginal.probabilities) + max_p = max(marginal.probabilities) + + for (node in node_names) { + prefix = gsub("_.*$", "", node) + if ( !(prefix %in% logical_op)) { +# Scaling ANDRE + increase_coeff = scale.nodes + (marginal.probabilities[node,] - min_p) / (max_p - min_p) + nAttrs$width[node] = nAttrs$width[node] * increase_coeff + nAttrs$height[node] = nAttrs$height[node] * increase_coeff + } + } + } + +# use colors defined in tronco$types + w = unlist(lapply(names(nAttrs$fillcolor), function(x){ + if (x %in% rownames(all.events)) + all.types[all.events[x,'type'], 'color'] + else + 'White' + })) + nAttrs$fillcolor[] = w + + + legend_logic = NULL + +# set color, size form and shape each logic nodes (if hypos expansion actived) + node.type = 'box' + if (expand) { + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'OR' + if (any(w)) { + legend_logic['Exclusivity (soft)'] = 'orange' + } + nAttrs$fillcolor[which(w)] = 'orange' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'AND' + if (any(w)) { + legend_logic['Co-occurence'] = 'darkgreen' + } + nAttrs$fillcolor[which(w)] = 'darkgreen' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'XOR' + if (any(w)) { + legend_logic['Exclusivity (hard)'] = 'red' + } + nAttrs$fillcolor[which(w)] = 'red' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'UPOR' + if (any(w)) { + legend_logic['Exclusivity (soft)'] = 'orange' + } + nAttrs$fillcolor[which(w)] = 'orange' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'UPAND' + if (any(w)) { + legend_logic['Co-occurence'] = 'lightgreen' + } + nAttrs$fillcolor[which(w)] = 'lightgreen' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == 'UPXOR' + if (any(w)) { + legend_logic['Exclusivity (hard)'] = 'red' + } + nAttrs$fillcolor[which(w)] = 'red' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + + } +#print(legend_logic) + + w = unlist(nAttrs$label[names(nAttrs$fillcolor)]) == '*' + if (any(w)) { + legend_logic['Co-occurence'] = 'lightgreen' + } + nAttrs$fillcolor[which(w)] = 'lightgreen' + nAttrs$label[which(w)] = '' + nAttrs$shape[which(w)] = node.type + nAttrs$height[which(w)] = height.logic + nAttrs$width[which(w)] = height.logic + +# node border to black + nAttrs$color = rep("black", length(node_names)) + names(nAttrs$color) = node_names + + nAttrs$fontcolor = rep("black", length(node_names)) + names(nAttrs$fontcolor) = node_names + +# set node border based on pathways information + legend_pathways = NULL + if(!is.null(pathways)) { + + + if(length(pathways.color) == 1 && pathways.color %in% rownames(brewer.pal.info)) + { + cat('Annotating pathways with RColorBrewer color palette', pathways.color, '.\n') + cols = brewer.pal(n=length(names(pathways)), name=pathways.color) + } + else + { + if(length(pathways.color) != length(names(pathways))) + stop('You did not provide enough colors to annotate', length(names(pathways)), 'pathways. + Either set pathways.color to a valid RColorBrewer palette or provide the explicit correct number of colors.') + cols = pathways.color + } + + names(cols) = names(pathways) + names(nAttrs$col) = node_names + + + for(path in names(pathways)) { + n = nAttrs$label[which(nAttrs$label %in% pathways[[path]])] + nAttrs$color[unlist(names(n))] = cols[[path]] + nAttrs$fontcolor[unlist(names(n))] = cols[[path]] + if(length(n) > 0) { + legend_pathways[path] = cols[[path]] + } + } + + } + +# edges properties + + edge_names = edgeNames(graph) + eAttrs = list() + +# set temporary edge shape + eAttrs$lty = rep("solid", length(edge_names)) + names(eAttrs$lty) = edge_names + +#set edge thikness based on prob + eAttrs$lwd = rep(1, length(edge_names)) + names(eAttrs$lwd) = edge_names + +#set edge name based on prob + eAttrs$label = rep('', length(edge_names)) + names(eAttrs$label) = edge_names + +#set fontsize to label.edge.size (default) + eAttrs$fontsize = rep(label.edge.size, length(edge_names)) + names(eAttrs$fontsize) = edge_names + +#set edge color to black (default) + eAttrs$color = rep(edge.color, length(edge_names)) + names(eAttrs$color) = edge_names + +#set edge arrowsize to 1 (default) + eAttrs$arrowsize = rep(1 * edge.cex, length(edge_names)) + names(eAttrs$arrowsize) = edge_names + +#record logic edge + eAttrs$logic = rep(F, length(edge_names)) + names(eAttrs$logic) = edge_names + + +# for each edge.. + for(e in edge_names) { + edge = unlist(strsplit(e, '~')) + from = edge[1] + to = edge[2] + + + if(from %in% names(hypos_new_name)) + { + old.name = hypos_new_name[from] + idx.from = which(rownames(consensus) == old.name) + from = idx.from + + } + + if(to %in% names(hypos_new_name)) + { + old.name = hypos_new_name[to] + idx.to = which(colnames(consensus) == old.name) + to = idx.to + } + + if(!all(c(from,to) %in% colnames(consensus))) + { + eAttrs$lwd[e] = MIN.HITS + eAttrs$label[e] = paste0('', MIN.HITS) + } + else + { + eAttrs$lwd[e] = consensus[from, to] + eAttrs$label[e] = paste0('', consensus[from, to]) + } + + } + +# remove arrows from logic node (hidden and) + for(e in edge_names) { + edge = unlist(strsplit(e, '~')) + from = edge[1] + to = edge[2] + + if (is.logic.node.down(to)) { + eAttrs$logic[e] = T + eAttrs$arrowsize[e] = 0 + + if(substr(to, start=1, stop=2) == 'OR') + eAttrs$color[e] = 'orange' + if(substr(to, start=1, stop=3) == 'XOR') + eAttrs$color[e] = 'red' + if(substr(to, start=1, stop=3) == 'AND') + eAttrs$color[e] = 'darkgreen' + + eAttrs$lty[e] = 'dashed' + + nAttrs$shape[to] = 'circle' + + } + + if (is.logic.node.up(from)) { + eAttrs$logic[e] = T + eAttrs$arrowsize[e] = 0 +# eAttrs$color[e] = 'black' + + eAttrs$lty[e] = 'dashed' + + + if(substr(from, start=1, stop=4) == 'UPOR') + eAttrs$color[e] = 'orange' + if(substr(from, start=1, stop=5) == 'UPXOR') + eAttrs$color[e] = 'red' + if(substr(from, start=1, stop=5) == 'UPAND') + eAttrs$color[e] = 'darkgreen' + + + } else if(substr(from, start=1, stop=1) == '*') { + eAttrs$logic[e] = T + eAttrs$arrowsize[e] = 0 + eAttrs$color[e] = 'black' + } + } + + plot(graph, nodeAttrs=nAttrs, edgeAttrs=eAttrs, main=title, ... ) + + + + if(!is.na(file)) + { + dev.copy2pdf(file = file) + } + + } + + diff --git a/R/tronco.bootstrap.R b/R/tronco.bootstrap.R deleted file mode 100644 index 45e56a07..00000000 --- a/R/tronco.bootstrap.R +++ /dev/null @@ -1,89 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @import lattice -#' @export tronco.bootstrap -#' @title perform bootstrap algorithm -#' -#' @description -#' \code{tronco.bootstrap} perform parametric and non-parametric bootstrap algorithms -#' -#' @param topology A topology returned by a reconstruction algorithm -#' @param lambda A lambda value, default is 0.5 -#' @param type The type of bootstrap performed, parametric and non parametric types are available. -#' To specify wich type of bootstrap run type must be "parametric" or "non-parametric". -#' @param nboot Samplig value. The grater will be the nboot value the logehr time the -#' entire process will take to complete the computing -#' @return -#' A topology object with bootstrap informations added -#' -tronco.bootstrap <- function(topology, lambda = 0.5, type = "non-parametric", nboot = 1000){ - show.level <- function(topology){ - print(topology@edge.confidence) - levelplot(topology@edge.confidence, xlab = "", ylab = "", - scales = list(x = list(alternating = 2, rot = 90), tck = 0), - main = paste("Edge confidence (", topology@bootstrap.settings$type, " bootstrap)",sep = "")) - } - - if(missing(topology)) - stop("Missing parameter for tronco.bootstrap function: tronco.bootstrap(topology, lambda, type, nboot)", call. = FALSE) - # If the given topology is valid and contains a valid dataset and other informations. - if(topology@is.valid){ - # If all numeric parameters are well formed. - if(nboot > 0){ - if(lambda > 0 && lambda < 1){ - if(type == "non-parametric" || type == "parametric"){ - - cat("Executing bootstrap algorithm this may take several time...\n") - - # The error rates are turned into the format required by bootstrap.caprese function. - error.rates <- list() - error.rates$error.fp <- topology@error.fp - error.rates$error.fn <- topology@error.fn - - # The bootstrap function of the caprese algorithm is performed. - boot.info <- bootstrap.caprese(topology@dataset, lambda, topology@adj.matrix, type, topology@estimated.marginal.probs, topology@estimated.cond.probs, error.rates, nboot) - - # Confidence informations are set into the topology object. - topology@edge.confidence <- boot.info$edge.confidence - topology@confidence <- boot.info$confidence - topology@bootstrap.settings <- boot.info$bootstrap.settings - topology@bootstrap <- TRUE - - cat(paste("Executed ", type, " bootsrap with ", nboot, - " as sampling number and ", lambda, " as lambda value\n\n", sep ="")) - - print(topology@edge.confidence) - cat("\nConfidence overall value:", boot.info$confidence$overall.value) - cat("\nConfidence overall frequency:", boot.info$confidence$overall.frequency) - - } - else - stop("Valid type of bootstrap are: non-parametric or parametric, check your typing!", call. = FALSE) - } - else - stop("Lambda must be in [0:1]!", call. = FALSE) - } - else - stop("Bootstrap number bust be greater than zero!", call. = FALSE) - } - else - stop("Topology object does not contain a valid CAPRESE reconstruction!", call. = FALSE) - - return(topology) -} \ No newline at end of file diff --git a/R/tronco.bootstrap.show.R b/R/tronco.bootstrap.show.R deleted file mode 100644 index 54a5cf72..00000000 --- a/R/tronco.bootstrap.show.R +++ /dev/null @@ -1,37 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -library(lattice) -#' @import lattice -#' @export tronco.bootstrap.show -#' @title show bootstrapping results -#' -#' @description -#' \code{tronco.bootstrap.show} show bootstrapping results. Requires that you already executed tronco.bootstrap -#' -#' @param topology A topology returned by a reconstruction algorithm -tronco.bootstrap.show <- function(topology){ - if(missing(topology)) - stop("Missing parameter for tronco.bootstrap.show function: tronco.bootstrap.show(topology)", call. = FALSE) - if(!topology@bootstrap) - stop("To show confidence information bootstrap execution is needed! see: tronco.bootstrap function!", call. = FALSE) - print(topology@edge.confidence) - levelplot(topology@edge.confidence, xlab = "", ylab = "", - scales = list(x = list(alternating = 2, rot = 90), tck = 0), - main = paste("Edge confidence (", topology@bootstrap.settings$type, " bootstrap)",sep = "")) -} \ No newline at end of file diff --git a/R/tronco.caprese.R b/R/tronco.caprese.R deleted file mode 100644 index b0eb3118..00000000 --- a/R/tronco.caprese.R +++ /dev/null @@ -1,115 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @export tronco.caprese -#' @title runs CAPRESE algorithm -#' -#' @description -#' \code{tronco.caprese} executes the CAPRESE algorithm on the dataset \code{data.values} specified. -#' -#' @details -#' \code{tronco.caprese} executes the reconstruction of the topology, and computesg all the confidence measures defined in \code{confidence}. -#' -#' @param dataset The input dataset. Type: dataframe. The dataset given as input is the data.values data frame loaded by the \code{data} function. -#' @seealso \code{\link{data}} -#' @param lambda the real positive value of the shrinkage coefficient, required to range in [0, 1]. Its default value is 0.5, if unspecified. -#' @param verbose execute CAPRESE algorithm with verbose output to screen. Type: boolean, dafault: FALSE. -#' @return an object containing the reconstructed topology and confidence values. -#' -tronco.caprese <- function(dataset, lambda = 0.5, verbose = FALSE){ - - if(missing(dataset)) - stop("Missing parameter for tronco.caprese function: tronco.caprese(dataset, lambda, verbose)", call. = FALSE) - if(lambda < 0 || lambda > 1) - stop("Lambda coefficient must be in [0:1]!", call. = FALSE) - else{ - if(is.null(dataset)) - stop("Empty dataset!", call. = FALSE) - - # Load CAPRESE algorithm - sapply(list.files(pattern="[.]R$", path="CAPRESE", full.names=TRUE), source) - - # Reconstruct the topology - topology <- caprese.fit(dataset, lambda, verbose) - - - if(!exists("invalid.events")) - stop("Invalid.events collection not fount! use data.load() function to load a dataset and looks for invalid events...", call = FALSE) - invalid.events <- invalid.events - # Collects info for the object of class topology. - info <- infoset(invalid.events$merged.events, invalid.events$removed.events) - - rownames(topology$adj.matrix) <- info$all.labels - colnames(topology$adj.matrix) <- info$all.labels - - rownames(topology$pr.score) <- info$all.labels - colnames(topology$pr.score) <- info$all.labels - - # Probability Labels assignment - rownames(topology$probabilities$conditional.probs) <- info$all.labels - rownames(topology$probabilities$marginal.probs) <- info$all.labels - - rownames(topology$probabilities$joint.probs) <- info$all.labels - colnames(topology$probabilities$joint.probs) <- info$all.labels - - # Estimated probability labels assignment - rownames(topology$probabilities$estimated.conditional.probs) <- info$all.labels - rownames(topology$probabilities$estimated.marginal.probs) <- info$all.labels - - rownames(topology$probabilities$estimated.joint.probs) <- info$all.labels - colnames(topology$probabilities$estimated.joint.probs) <- info$all.labels - - - topology.obj <- new("topology", - dataset = topology$dataset, - - marginal.probs = topology$probabilities$marginal.probs, - joint.probs = topology$probabilities$joint.probs, - cond.probs = topology$probabilities$conditional.probs, - - estimated.marginal.probs = topology$probabilities$estimated.marginal.probs, - estimated.joint.probs = topology$probabilities$estimated.joint.probs, - estimated.cond.probs = topology$probabilities$estimated.conditional.probs, - - edge.confidence = matrix(), - confidence = list(), - bootstrap.settings = list(), - bootstrap = FALSE, - - pr.score = topology$pr.score, - adj.matrix = topology$adj.matrix, - adj.matrix.bic = matrix(), - - # The parameter is fixed because of the last code changes ;) - is.valid = TRUE, - invalid.events = invalid.events, - - error.fp = topology$error.rates$error.fp, - error.fn = topology$error.rates$error.fn, - algorithm = "CAPRESE") - - cat(paste("Executed CAPRESE algorithm with shrinkage coefficient:", lambda,"\n")) - cat(" Estimated false positives error rate: ", topology.obj@error.fp) - cat("\n Estimated false negative error rate: ", topology.obj@error.fn) - - return(topology.obj) - - - } - -} \ No newline at end of file diff --git a/R/tronco.plot.R b/R/tronco.plot.R deleted file mode 100644 index 38c6a473..00000000 --- a/R/tronco.plot.R +++ /dev/null @@ -1,254 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @import Rgraphviz -#' @import graph -#' @export tronco.plot -#' @title plot a progression model -#' -#' @description -#' \code{tronco.plot} plots a progression model from a recostructed \code{topology}. -#' -#' -#' @param topology A topology returned by a reconstruction algorithm -#' @param title plot Plot title (default "Progression model x", x reconstruction algorithm) -#' @param title.color color title (default "black") -#' -#' @param legend bool; show/hide the legend (default is t) -#' @param legend.pos string; legend positioning, available keywords "topleft", "topright", -#' "bottom- left" and "bottomright" (default is "bottomright") -#' @param legend.title string; legend title (default is "Legend") -#' -#' @param legend.columns int; use 1 or 2 columns to plot the legend (default is 1) -#' @param legend.inline bool; print inline legend (default is f) -#' @param legend.coeff double; size of the types label in the legend (default is 1) -#' -#' @param label.coeff double; size of the events label (default is 1) -#' @param label.color color events label (default "black") -#' @param label.edge.size double; size of the confidence label, when used (default is 12) -#' -#' @param confidence bool; plot edges according to confidence (default is f) -#' @examples -#' \dontrun{ -#' types.load("data/types.txt") -#' events.load("data/events.txt") -#' data.load("data/CGH.txt") -#' topology <- tronco.caprese(data.values) -#' tronco.plot(topology, legend.pos = "topleft", legend = TRUE, confidence = TRUE, -#' legend.col = 1, legend.coeff = 0.7, label.edge.size = 10, label.coeff = 0.7) -#' } -tronco.plot <- function(topology, title = paste("Progression model", topology@algorithm, sep = " "), - title.color = "black", confidence = FALSE, legend = TRUE, legend.title = "Legend", - legend.columns = 1, legend.inline = FALSE, legend.pos = "bottomright", legend.coeff = 1, label.coeff = 1, - label.color = "black", label.edge.size = 12){ - primafacie = FALSE - ratio = FALSE - size = FALSE - if(missing(topology)) - stop("Missing parameter for tronco.plot function: tronco.plot(topology, ...", call. = FALSE) - if(exists("events") && exists("types") && (length(types) > 0) && (length(events) > 0)){ - - events <- events - types <- types - invalid.events <- invalid.events - - # Collects info for the graph building, such as colors and labels the nodes. - info <- infoset(invalid.events$merged.events, invalid.events$removed.events) - - colors <- info$colors - n.names <- info$all.vis.labels - - # Build the graph object from the adjacency matrix - if(topology@algorithm == "CAPRESE"){ - adj.matrix <- topology@adj.matrix - g <- graphAM(adjMat=adj.matrix, edgemode="directed") - } - else if(topology@algorithm == "CAPRI"){ - # Creates a graph object using "prima.facie" adj.matrix - adj.matrix <- topology@adj.matrix - g <- graphAM(adjMat=adj.matrix, edgemode="directed") - # Creates a graph object using the "bic" adj.matrix - adj.matrix <- topology@adj.matrix.bic - g1 <- graphAM(adjMat=adj.matrix, edgemode="directed") - } - - # If confidece in requested but bootstrap is not performed an error is displayed - if(confidence && !topology@bootstrap) - stop("To show confidence information bootstrap execution is needed! see: tronco.bootstrap function!", call. = FALSE) - - # Build a list of edges and their thickness - edw <- c() - names <- edgeNames(g) - edge.style <- c() - if(topology@algorithm == "CAPRESE" && topology@bootstrap){ - - low.conficence.edges <- FALSE - - for(i in 1:nrow(adj.matrix)) - for(j in 1:ncol(adj.matrix)) - if(adj.matrix[i,j] == 1){ - edw <- c(edw, topology@edge.confidence[i,j]) - } - - # To let tronco.plot draw edges with a grayscale palette call - # grayscale.color(g,edw) insted of the rep function - # to assign colors to edge.color variable - edge.color <- rep("black", length(edgeNames(g))) - edge.style <- rep("solid", length(edgeNames(g))) - ed.name <- edw - edw.notk <- edw - - # Thickness of edges start form 1 but all bootstrap values are - # lower than one, so a proprional factor is set. - # An extra 1.5 factor is been required by the thinner edges - # to be correctly shown - edw <- edw * 8 - - low.confidence.edges <- FALSE - ed.label <- lapply(ed.name, toString) - for(i in 1:length(ed.label)){ - if(edw.notk[i] == 1) - ed.label[i] <- substr(ed.label[i],1,4) - else if(edw.notk[i] == 0){ - ed.label[i] <- ".0" - edge.style[i] <- "dashed" - edw[i] <- 1 - low.confidence.edges <- TRUE - } - else - ed.label[i] <- substr(ed.label[i],2,4) - - } - ed.label <- paste(" ", ed.label, sep="") - - names(edw) <- names - names(ed.name) <- names - names(ed.label) <- edgeNames(g) - names(edge.style) <- edgeNames(g) - - if(low.confidence.edges) - cat("Edges with confidence zero will be displayed with dashed edges.\n") - - } - else - edge.color <- rep("black", length(edgeNames(g))) - - # Set a parameter to proportionally set the size of node labels - max.node.label.len <- max(nchar(colnames(topology@adj.matrix))) - mean.node.label.len <- mean(nchar(colnames(topology@adj.matrix))) - label.node.length <- mean.node.label.len*label.coeff/max.node.label.len - - # Sets parameters for each edge or node - shape <- rep("ellipse", nrow(adj.matrix)) - arrows <- rep("open", length(edgeNames(g))) - textColor <- rep(label.color, nrow(adj.matrix)) - label.edge.size <- rep(label.edge.size, length(edgeNames(g))) - - - if(any(types$color == label.color)) - warning("Label with same nodes background color!", call. = FALSE) - - # Sets names of each pamater - names(colors) <- colnames(topology@adj.matrix) - names(shape) <- colnames(topology@adj.matrix) - names(arrows) <- edgeNames(g) - names(n.names) <- colnames(topology@adj.matrix) - names(textColor) <- colnames(topology@adj.matrix) - names(edge.color) <- edgeNames(g) - names(label.edge.size) <- edgeNames(g) - - - if(topology@algorithm == "CAPRESE" && confidence){ - edgeRenderInfo(g) <- list(lwd = edw, fontsize = label.edge.size, lty = edge.style) - eAttrs = list(label = ed.label, arrowhead = arrows, color = edge.color) - } - else - eAttrs = list() - - if(topology@algorithm == "CAPRI" && primafacie){ - edge.color <- rep("red", length(edgeNames(g))) - names(edge.color) <- edgeNames(g) - for(i in 1:length(edgeNames(g))){ - if(any(edgeNames(g1) == names(edge.color[i]))) - edge.color[i] <- "black" - } - eAttrs = list(arrowhead = arrows, color = edge.color) - - }else if(topology@algorithm == "CAPRI" && !primafacie) - g <- g1 - - - # Graph building section. - # Different type of parameters are assigned to the plot in differents ways. - # Please check out all the "RGraphviz" manuals on Bioconductor page and - # consider the graph drawing documentation on CRAN - - nodeRenderInfo(g) <- list(cex = label.node.length, textCol = textColor) - - graph.par(list(graph = list(main = title, col.main = title.color))) - - nAttrs = list(shape = shape, fillcolor = colors, color = colors, label = n.names) - - if(ratio == FALSE && size == FALSE) - attrs = list() - else - attrs = list(graph = list(ratio = ratio, size = size)) - - g <- layoutGraph(g, nodeAttrs = nAttrs, edgeAttrs = eAttrs, attrs = attrs) - - #Elimino i bordi attorno al plot - mar <- par("mar") - par("mar" = c(1, 1, 1, 1) + 0.1) - - # Once the graph is ready to be displayed the renderGraph function does the job. - renderGraph(g) - - if(legend){ - - legend.names <- types$type - col <- types$color - - if(legend.inline) - legend.columns <- length(legend.names) - - if(legend.columns > length(legend.names)) - stop("Legend must have at the most columns equal to the number of types!") - - # Adds the legend to the plot. - legend(legend.pos, - legend = legend.names, - title = legend.title, - bty = 'n', - cex = legend.coeff, - pch = c(19,19), - ncol = legend.columns, - pt.cex = 3*legend.coeff, - col = col, - xjust = 1, - xpd = TRUE, - y.intersp = 1.7, - x.intersp = 1.2) - - } - - par("mar" = mar) - cat("Plot created successfully\n") - } - else - stop("types, events or topology variable not found") -} \ No newline at end of file diff --git a/R/types.R b/R/types.R deleted file mode 100644 index 1b0dd4a4..00000000 --- a/R/types.R +++ /dev/null @@ -1,10 +0,0 @@ -#' @name types -#' @title Types collection for Ovarian cancer CGH data -#' @description -#' This example contains a collection of types associeted to the -#' Ovarian cancer CGH dataset -#' @docType data -#@usage events.load("data/events.txt") -#' @format -#' An example with 2 types -NULL \ No newline at end of file diff --git a/R/types.add.R b/R/types.add.R deleted file mode 100644 index 63784ad4..00000000 --- a/R/types.add.R +++ /dev/null @@ -1,61 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @export types.add -#' @title add a new type of event (e.g., missense point mutation) -#' -#' @description -#' \code{types.add} sets a global data frame 'types' that contains all types defined. Types can be added and refined incrementally, in any order. -#' -#' @details -#' \code{types.add} defines a type of event considered at a time. If the type was previously defined, its definition is updated to keep track of its last definition. A consistency check is performed to ensure that the type is valid. Types must be defined before events are loaded. -#' -#' @param type.name The type label. All type labels are strings. -#' @param color.name The type color. All R's color definitions are allowed. -#' @examples -#' types.add("gain", "red") -#' -types.add <- function(type.name, color.name){ - - if(missing(type.name) || missing(color.name)){ - stop("Missing parameter for types.add function: types.add(type.name, color.name)", call. = FALSE) - } - else{ - # If a global types variable is found, the new definition is queued to the definitions found. - if(exists("types") && (length(types) > 0)){ - types <- types - types.local <- rbind(types, data.frame(type = type.name, color = color.name, stringsAsFactors = FALSE)) - - } - else{ - types.local <- data.frame(type = type.name, color = color.name, stringsAsFactors = FALSE) - } - - # The user is free to leave spaces between each element in the definition, definitions file is more clear this way. - types.local <- trim.types(types.local) - - # The check function perform consistency and correctness checks. - types.local <- check.types(types.local, FALSE) - - cat(paste("Set color value \"", color.name , "\" for events of type \"", type.name, "\"\n", sep ="")) - - assign("types", types.local, envir = .GlobalEnv) - } - - -} \ No newline at end of file diff --git a/R/types.load.R b/R/types.load.R deleted file mode 100644 index b4662dc7..00000000 --- a/R/types.load.R +++ /dev/null @@ -1,99 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#' @export types.load -#' @title load a set of types from file -#' -#' @description -#' \code{types.load} sets a global data frame 'types' that contains all type definitions found in a specified file or dataset -#' to be validated. -#' -#' @details -#' \code{types.load} allows to load type definitions from a given file path. The file which contains -#' all the definitions must be structured as a csv file. All definitions are couple of values -#' type name and color name as shown below: -#' -#' typeName, colorName -#' ... , ... -#' -#' @seealso \code{\link{types.add}} -#' @param data.input The input file path or a dataset to be validated. - -types.load <- function(data.input){ - - err <- "" - message <- "The definition file contains errors!" - - if(missing(data.input)){ - stop("Missing parameter for types.load function: types.load(data.input)", call. = FALSE) - } - - - - # If a global events variable is found, the new definition is queued to the definitions found. - if(exists("types") && (length(types) > 0)){ - types <- types - - if(is.data.frame(data.input)) - types.file <- data.input - else{ - # If the pathname is correct - if(file.exists(data.input)){ - # Definition file may contain error such as the lack of a comma or columns, a try-catch manages this. - err <- tryCatch( types.file <- suppressWarnings(read.table(data.input, sep = ",", col.names = c("type", "color"), stringsAsFactors = FALSE)), - error = function(e) err <- message) - if(toString(err) == message) - stop(err, call. = FALSE) - }else - stop("File not found!", call. = FALSE) - } - if(nrow(types.file) > 0) - # The new definitions are queued. - types.local <- rbind(types, types.file) - else - stop("Empty set of types at input file path or dataset!", call. = FALSE) - } - else{ - if(is.data.frame(data.input)) - types.local <- data.input - else{ - # If the pathname is correct - if(file.exists(data.input)){ - err <- tryCatch( types.local <- suppressWarnings(read.table(data.input, sep = ",", col.names = c("type", "color"), stringsAsFactors = FALSE)), - error = function(e) err <- message) - if(toString(err) == message) - stop(err, call. = FALSE) - }else - stop("File not found!", call. = FALSE) - } - if(nrow(types.local) == 0) - stop("Empty set of types at input file path or dataset!", call. = FALSE) - } - - # The user is free to leave spaces between each element in the definition, definitions file is more clear this way. - types.local <- trim.types(types.local) - - # The check function perform consistency and correctness checks. - types.local <- check.types(types.local, TRUE) - - for(i in 1:nrow(types.local)) - cat(paste("Set color value \"", types.local[i,"color"] , "\" for events of type \"", types.local[i,"type"], "\"\n", sep ="")) - - assign("types", types.local, envir = .GlobalEnv) - -} \ No newline at end of file diff --git a/R/verify.parents.R b/R/verify.parents.R deleted file mode 100644 index e9b610f9..00000000 --- a/R/verify.parents.R +++ /dev/null @@ -1,56 +0,0 @@ -################################################################################## -# # -# TRONCO: a tool for TRanslational ONCOlogy # -# # -################################################################################## -# Copyright (c) 2014, Marco Antoniotti, Giulio Caravagna, Alex Graudenzi, # -# Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, Giancarlo Mauri, Bud Mishra # -# and Daniele Ramazzotti. # -# # -# All rights reserved. This program and the accompanying materials # -# are made available under the terms of the Eclipse Public License v1.0 # -# which accompanies this distribution, and is available at # -# http://www.eclipse.org/legal/epl-v10.html and in the include COPYING file # -# # -# Initial contributors: # -# Giulio Caravagna, Alex Graudenzi, Mattia Longoni and Daniele Ramazzotti. # -################################################################################## - -#verify the independent progression filter -#INPUT: -#best.parents: best edges to be verified -#marginal.probs: observed marginal probabilities -#joint.probs: observed joint probabilities -#RETURN: -#best.parents: list of the best valid parents -"verify.parents" <- -function(best.parents,marginal.probs,joint.probs) { - #verify the condition for the best parent of each node - for (i in 1:length(best.parents)) { - #if there is a connection, i.e. the node is not already attached to the Root - if(best.parents[i]!=-1) { - #score for the root as the parent of this node - w.root.node = 1/(1+marginal.probs[i]); - #compute the scores for the edges to all the other upstream nodes - attach.to.root = 1; - for (j in 1:length(marginal.probs)) { - #if the connection is valid and the parent node has greater probability - #i.e. it is before the child in temporal order - if(i!=j && marginal.probs[j]>marginal.probs[i]) { - w.parent.node = (marginal.probs[j]/(marginal.probs[i]+marginal.probs[j]))*(joint.probs[i,j]/(marginal.probs[i]*marginal.probs[j])); - #the parent is valid if this condition is valid at least one time (i.e. for at least one of the upstream nodes) - #meaning that if we find out that a connection is not spurious for any node, the best parent is not spurious as well - if(w.root.node<=w.parent.node) { - attach.to.root = 0; - break; - } - } - } - #connect the node to the Root if the flag is true - if(attach.to.root==1) { - best.parents[i] = -1; - } - } - } - return(best.parents); -} diff --git a/R/visualization.R b/R/visualization.R new file mode 100644 index 00000000..f4e7115f --- /dev/null +++ b/R/visualization.R @@ -0,0 +1,2554 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + + +# oncoPrint : plot a genotype +# +#' @title oncoprint +#' @param x A TRONCO compliant dataset +#' @param excl.sort Boolean value, if TRUE sorts samples to enhance exclusivity of alterations +#' @param samples.cluster Boolean value, if TRUE clusters samples (columns). Default FALSE +#' @param genes.cluster Boolean value, if TRUE clusters genes (rows). Default FALSE +#' @param file If not NA write to \code{file} the Oncoprint, default is NA (just visualization). +#' @param ann.stage Boolean value to annotate stage classification, default depends on \code{x} +#' @param ann.hits Boolean value to annotate the number of events in each sample, default is TRUE +#' @param stage.color RColorbrewer palette to color stage annotations. Default is 'YlOrRd' +#' @param hits.color RColorbrewer palette to color hits annotations. Default is 'Purples' +#' @param null.color Color for the Oncoprint cells with 0s, default is 'lightgray' +#' @param border.color Border color for the Oncoprint, default is white' (no border) +#' @param text.cex Title and annotations cex, multiplied by font size 7 +#' @param font.column If NA, half of font.row is used +#' @param font.row If NA, max(c(15 * exp(-0.02 * nrow(data)), 2)) is used, where data is the data +#' visualized in the Oncoprint +#' @param title Oncoprint title, default is as.name(x) - see \code{as.name} +#' @param sample.id If TRUE shows samples name (columns). Default is FALSE +#' @param hide.zeroes If TRUE trims data - see \code{trim} - before plot. Default is FALSE +#' @param legend If TRUE shows a legend for the types of events visualized. Defualt is TRUE +#' @param legend.cex Default 0.5; determines legend size if \code{legend = TRUE} +#' @param cellwidth Default NA, sets autoscale cell width +#' @param cellheight Default NA, sets autoscale cell height +#' @param group.by.label Sort samples (rows) by event label - usefull when multiple events per gene are +#' available +#' @param group.samples If this samples -> group map is provided, samples are grouped as of groups +#' and sorted according to the number of mutations per sample - usefull when \code{data} was clustered +#' @param group.by.stage Default FALSE; sort samples by stage. +#' @param gene.annot Genes'groups, e.g. list(RAF=c('KRAS','NRAS'), Wnt=c('APC', 'CTNNB1')). Default is NA. +#' @param gene.annot.color Either a RColorColorbrewer palette name or a set of custom colors matching names(gene.annot) +#' @param show.patterns If TRUE shows also a separate oncoprint for each pattern. Default is FALSE +#' @param annotate.consolidate.events Default is FALSE. If TRUE an annotation for events to consolidate is shown. +#' @param txt.stats By default, shows a summary statistics for shown data (n,m, |G| and |P|) +#' @param ... other arguments to pass to pheatmap +#' @export oncoprint +#' @importFrom gridExtra grid.arrange +#' @importFrom RColorBrewer brewer.pal brewer.pal.info +#' @importFrom gtable gtable gtable_add_grob gtable_height gtable_width +oncoprint <- function(x, + excl.sort = TRUE, + samples.cluster = FALSE, + genes.cluster = FALSE, + file = NA, + ann.stage = has.stages(x), + ann.hits = TRUE, + stage.color = 'YlOrRd', + hits.color = 'Purples', + null.color = 'lightgray', + border.color = 'white', + text.cex = 1.0, + font.column = NA, + font.row = NA, + title = as.description(x), + sample.id = FALSE, + hide.zeroes = FALSE, + legend = TRUE, + legend.cex = 0.5, + cellwidth = NA, + cellheight = NA, + group.by.label = FALSE, + group.by.stage = FALSE, + group.samples = NA, + gene.annot = NA, + gene.annot.color = 'Set1', + show.patterns = FALSE, + annotate.consolidate.events = FALSE, + txt.stats = paste(nsamples(x),' samples\n', nevents(x), ' events\n', + ngenes(x), ' genes\n', npatterns(x), ' patterns', sep=''), + ...) +{ + + font.size = text.cex * 7 + + + +############## This function sorts a matrix to enhance mutual exclusivity + exclusivity.sort <- function(M) { + geneOrder <- sort(rowSums(M), decreasing=TRUE, index.return=TRUE)$ix; + scoreCol <- function(x) { + score <- 0; + for(i in 1:length(x)) { + if(x[i]) { + score <- score + 2^(length(x)-i); + } + } + return(score); + } + scores <- apply(M[geneOrder, , drop = FALSE ], 2, scoreCol); + sampleOrder <- sort(scores, decreasing=TRUE, index.return=TRUE)$ix; + + res = list() + res$geneOrder = geneOrder + res$sampleOrder = sampleOrder + res$M = M[geneOrder, sampleOrder] + + return(res); + } + +############## Check input data + cat(paste('*** Oncoprint for "', title, '"\nwith attributes: stage=', ann.stage, ', hits=', ann.hits, '\n', sep='')) + is.compliant(x, 'oncoprint', stage=ann.stage) + x = enforce.numeric(x) + +############## If hide.zeros trim x + if (hide.zeroes) { + cat(paste('Trimming the input dataset (hide.zeroes).\n', sep='')) + x = trim(x) + } + +############## Reverse the heatmap under the assumption that ncol(data) << nrow(data) + data = t(x$genotypes) + nc = ncol(data) + nr = nrow(data) + +############## Sort data, if required. excl.sort and group.samples are not compatible + hasGroups = !any(is.na(group.samples)) + + if(group.by.stage && !ann.stage) + stop('Cannot group samples by stage if no stage annotation is provided.') + + if(group.by.stage && excl.sort) + + if(excl.sort && (hasGroups || group.by.stage)) + stop('Disable sorting for mutual exclusivity (excl.sort=FALSE) or avoid using grouped samples.') + +# Exclusivity sort + if(excl.sort && nevents(x) > 1) { + cat(paste('Sorting samples ordering to enhance exclusivity patterns.\n', sep='')) + sorted.data = exclusivity.sort(data) + data = sorted.data$M + } + + if(group.by.stage) + { + ord.stages = as.stages(x)[order(as.stages(x)), , drop = F] + cat('Grouping samples by stage annotation.\n') + + aux.fun = function(samp) { + print(samp) + sub.data = data[, samp, drop= F] + sub.data = sub.data[, order(colSums(sub.data), decreasing = FALSE), drop = F] + return(sub.data) + } + + + new.data = NULL + u.stages = sort(unlist(unique(as.stages(x))), na.last = T) + +#print(str(ord.stages)) + + for(i in u.stages) + { + print(ord.stages[which(ord.stages == i), , drop=F]) + new.data = cbind(new.data, aux.fun(rownames(ord.stages[which(ord.stages == i), , drop= F]))) + } + data = new.data + data = data[order(rowSums(data), decreasing = TRUE), , drop = F ] + } + +# Samples grouping via hasGroups + if(hasGroups) + { + group.samples[,1] = as.character(group.samples[,1]) + grn = rownames(group.samples) + + cat(paste('Grouping samples according to input groups (group.samples).\n', sep='')) + if(any(is.null(grn))) stop('"group.samples" should be matrix with sample names and group assignment.') + + if(!setequal(grn, as.samples(x))) + stop(paste0('Missing group assignment for samples: ', paste(setdiff(as.samples(x), grn), collapse=', '),'.')) + +# Order groups by label, and then data (by column) + order = order(group.samples) + group.samples = group.samples[order, , drop=FALSE] + + data = data[, rownames(group.samples)] + data = data[order(rowSums(data), decreasing = TRUE), , drop = FALSE] + + groups = unique(group.samples[,1]) + + for(i in 1:length(groups)) + { + subdata = data[, group.samples == groups[i], drop = FALSE] + subdata = subdata[, order(colSums(subdata), decreasing = TRUE), drop = FALSE] + data[ , group.samples == groups[i]] = subdata + } + } + +############## If group.by.label group events involving the gene symbol + if (group.by.label) { + cat(paste('Grouping events by gene label, samples will not be sorted.\n', sep='')) + genes = as.genes(x) + data = data[ order(x$annotations[rownames(data), 'event']), ] + } + + cn = colnames(data) + rn = rownames(data) + +############## SAMPLES annotations: hits (total 1s per sample), stage or groups + samples.annotation = data.frame(row.names = cn, stringsAsFactors= F) + nmut = colSums(data) + + if(ann.hits) samples.annotation$hits = nmut + if(ann.stage) samples.annotation$stage = as.stages(x)[cn, 1] + if(hasGroups) samples.annotation$cluster = group.samples[cn, 1] + +############## Color samples annotation + annotation_colors = NULL + +# Color hits + if(ann.hits){ + hits.gradient = (colorRampPalette(brewer.pal(6, hits.color))) (max(nmut)) + annotation_colors = append(annotation_colors, list(hits=hits.gradient)) + } + +# Color stage + if(ann.stage){ + cat('Annotating stages with RColorBrewer color palette', stage.color, '\n') + + different.stages = sort(unique(samples.annotation$stage)) + + stage.color.attr = sample.RColorBrewer.colors(stage.color, length(different.stages)) + stage.color.attr = append(stage.color.attr, "#FFFFFF") + + samples.annotation[is.na(samples.annotation)] = "none" + names(stage.color.attr) = append(different.stages, "none") + annotation_colors = append(annotation_colors, list(stage=stage.color.attr)) + } + +# Color groups + if(hasGroups) { + ngroups = length(unique(group.samples[,1])) + cat('Grouping labels:', paste(unique(group.samples[,1]), collapse=', '), '\n') + + group.color.attr = sample.RColorBrewer.colors('Accent', ngroups) + names(group.color.attr) = unique(group.samples[,1]) + annotation_colors = append(annotation_colors, list(cluster=group.color.attr)) + } + +##### GENES/EVENTS annotations: groups or indistinguishable + genes.annotation = NA + +# Annotate genes groups + if(!all(is.na(gene.annot))) + { + names = names(gene.annot) + + genes.annotation = data.frame(row.names = rn, stringsAsFactors = FALSE) + genes.annotation$group = rep("none", nrow(data)) + + for(i in 1:length(names)) + { + pathway = names[i] + genes.pathway = rownames(as.events(x, genes=gene.annot[[names[i]]])) + genes.annotation[genes.pathway, 'group'] = names[i] + } + + if(length(gene.annot.color) == 1 && gene.annot.color %in% rownames(brewer.pal.info)) + { + cat('Annotating genes with RColorBrewer color palette', gene.annot.color, '.\n') + gene.annot.color = sample.RColorBrewer.colors(gene.annot.color, length(names)) + gene.annot.color = append(gene.annot.color, "#FFFFFF") + } + else{ + if(length(gene.annot.color) != length(names)) + stop('You did not provide enough colors to annotate', length(names), 'genes + Either set gene.annot.color to a valid RColorBrewer palette or provide the explicit correct number of colors.') + + cat('Annotating pathways with custom colors:', paste(gene.annot.color, collapse=', '), '\n') + gene.annot.color = append(gene.annot.color, "#FFFFFF") + } + names(gene.annot.color) = append(names, "none") + gene.annot.color = gene.annot.color[ unique(genes.annotation$group) ] + annotation_colors = append(annotation_colors, list(group=gene.annot.color)) + } + +# Annotate events to consolidate + if(annotate.consolidate.events) + { + cat('Annotating events to consolidate - see consolidate.data\n') + invalid = consolidate.data(x, FALSE) + + genes.annotation$consolidate = 'none' + + genes.annotation[ + unlist( + lapply( + invalid$indistinguishable, + function(z){return(rownames(unlist(z)))} + )), + 'consolidate'] = 'indistinguishable' + + genes.annotation[ unlist(invalid$zeroes), 'consolidate'] = 'missing' + genes.annotation[ unlist(invalid$ones), 'consolidate'] = 'persistent' + + consolidate.colors = c('white', 'brown1', 'darkorange4', 'firebrick4', 'deepskyblue3') + names(consolidate.colors) = c('none', 'indistinguishable', 'missing', 'persistent') + consolidate.colors = consolidate.colors[unique(genes.annotation$consolidate)] + + annotation_colors = append( + annotation_colors, list(consolidate= consolidate.colors) + ) + } + +############## Augment gene names with frequencies + genes.freq = rowSums(data)/nsamples(x) + gene.names = x$annotations[rownames(data),2] + gene.names = paste(round(100 * genes.freq, 0) ,'% ', gene.names, sep='') # row labels + + +############## Augment data to make type-dependent colored plots + data.lifting = function(obj, matrix) + { + types = as.types(obj) + map.gradient = null.color + + for(i in 1:ntypes(obj)) + { + events = as.events(obj, type=as.types(obj)[i]) + keys = rownames(events) + + if (ntypes(obj) > 1) { + keys.subset = keys[unlist(lapply(keys, function(obj, matrix){if (obj %in% matrix) T else F}, rownames(matrix)))] + sub.data = matrix[keys.subset, , drop = FALSE] + +# shift 1s to 'i', add color to the map + idx = which(sub.data == 1) + if(length(idx) > 0) map.gradient = cbind(map.gradient, as.colors(obj)[i]) + + sub.data[idx] = i + matrix[keys.subset, ] = sub.data + + } else { + map.gradient = cbind(map.gradient, as.colors(obj)[i]) + } + } + + map.gradient = c(null.color, as.colors(x)) + names(map.gradient)[1] = 'none' + + return(list(data=matrix, colors=map.gradient)) + } + + pheat.matrix = data.lifting(x,data) + map.gradient = pheat.matrix$colors + data = pheat.matrix$data + +############## Set fontisize col/row + if(is.na(font.row)) + { + font.row = max(c(15 * exp(-0.02 * nrow(data)), 2)) + cat(paste('Setting automatic row font (exponential scaling): ', round(font.row, 1), '\n', sep='')) + } + if(is.na(font.column) && sample.id) + { + font.column = font.row/2 + cat(paste('Setting automatic samples font half of row font: ', round(font.column, 1), '\n', sep='')) + } + +############## Finalizing legends etc. + legend.labels = c('none', unique(x$annotations[,1])) + legend.labels = legend.labels[1:(max(data)+1)] + + if(samples.cluster) cat('Clustering samples and showing dendogram.\n') + if(genes.cluster) cat('Clustering alterations and showing dendogram.\n') + + if(is.null(annotation_colors)) annotation_colors = NA + + if(length(list(...)) > 0) { + cat('Passing the following parameters to TRONCO pheatmap:\n') + print(list(...)) + } + +# if(!is.na(txt.stats)) +# { +# if(npatterns(x) > 0) +# { +# patterns = as.patterns(x) +# +# for(i in 1:length(patterns)) +# { +# genes.patt = as.events.in.patterns(x, patterns[i]) +# txt.stats = paste(txt.stats, '\n\n', +# patterns[i], '\n', +# paste(apply(genes.patt, 1, paste, collapse=' '), collapse='\n') +# ) +# +# } +# } +# } + +############## Real pheatmap + if (ncol(samples.annotation) == 0) { + samples.annotation = NA + } + + ret = pheatmap(data, + scale = "none", + col = map.gradient, + cluster_cols = samples.cluster, + cluster_rows = genes.cluster, + main = title, + fontsize = font.size, + fontsize_col = font.column, + fontsize_row = font.row, + annotation_col = samples.annotation, + annotation_row = genes.annotation, + annotation_colors = annotation_colors, + border_color = border.color, + border=T, + margins = c(10,10), + cellwidth = cellwidth, + cellheight = cellheight, + legend = legend, + legend_breaks = c(0:max(data)), + legend_labels = legend.labels, + legend.cex = legend.cex, + labels_row = gene.names, + drop_levels=T, + show_colnames = sample.id, + filename=file, + txt.stats = txt.stats, + ... + ) + +############## Extra patterns + +patt.table = gtable(widths = unit(c(7, 2), "null"), height = unit(c(2, 7), "null")) + +patt.table = list() +map.gradient = map.gradient[names(map.gradient) != 'Pattern'] + +#print(data) +if(npatterns(x) > 0 && show.patterns) +{ + cat('Plotting also', npatterns(x), 'patterns\n') + patterns = as.patterns(x) + + for(i in 1:length(patterns)) + { + genes.patt = as.events.in.patterns(x, patterns[i]) + genes.patt.genos = data[rownames(genes.patt), , drop = F] + + + genes.patt.genos.gtable = pheatmap( + exclusivity.sort(genes.patt.genos)$M, + scale = "none", + col = map.gradient, + cluster_cols = FALSE, + cluster_rows = FALSE, + main = paste('Pattern:', patterns[i]), + fontsize = font.size * .75, + fontsize_col = font.column * .75, + fontsize_row = font.row * .75, +#annotation_row = genes.annotation, + annotation_colors = annotation_colors, + annotation_legend = FALSE, + border_color = border.color, + border = T, + cellwidth = 6, + cellheight = 6, + legend = FALSE, + labels_row = genes.patt[rownames(genes.patt.genos), 'event'], + drop_levels=T, + show_colnames = FALSE, + silent = T, + ...)$gtable + +#patt.table = gtable_add_grob(patt.table, genes.patt.genos.gtable, 2, 1) + patt.table = append(patt.table, list(genes.patt.genos.gtable)) + + } + + str = paste( + 'grid.arrange(ret$gtable, arrangeGrob(', + paste('patt.table[[', 1:length(patt.table), ']],', collapse = ''), + 'ncol = 1), ncol = 1, heights = c(4,', paste(rep(1, length(patt.table) ), collapse=', '), '))') + + str = paste( + 'grid.arrange(ret$gtable, arrangeGrob(', + paste('patt.table[[', 1:length(patt.table), ']],', collapse = ''), + 'ncol = 1), ncol = 1, heights = c(', paste(rep(1, length(patt.table) + 1 ), collapse=', '), '))') + + + eval(parse(text = str)) +} + + +# print(ncol(ret$gtable)) +# print(ncol(patt.table)) + +#ret$gtable = rbind(ret$gtable, patt.table) +#ret$gtable = gtable_add_grob(ret$gtable, patt.table, 1 , 1) +#grid.newpage() +#grid.draw(ret$gtable) + + + +return(ret) +} + + +##### Pathway print +#' Visualise pathways informations +#' @title pathway.visualization +#' +#' @param x A TRONCO complian dataset +#' @param title Plot title +#' @param file To generate a PDF a filename have to be given +#' @param pathways.color A RColorBrewer color palette +#' @param aggregate.pathways todo +#' @param pathways todo +#' @param ... todo +#' @return plot information +#' @export pathway.visualization +pathway.visualization = function(x, + title = paste('Pathways:', paste(names(pathways), collapse=', ', sep='')), + file, + pathways.color = 'Set2', + aggregate.pathways, + pathways, + ...) +{ + names = names(pathways) + + if(length(pathways.color) == 1 && pathways.color %in% rownames(brewer.pal.info)) + { + cat('Annotating pathways with RColorBrewer color palette', pathways.color, '.\n') + pathway.colors = brewer.pal(n=length(names), name=pathways.color) + } + else{ + print(pathways.color) + if(length(pathways.color) != length(names)) + stop('You did not provide enough colors to annotate ', length(names), ' pathways. + Either set pathways.color to a valid RColorBrewer palette or provide the explicit correct number of colors.') + + cat('Annotating pathways with custom colors', paste(pathways.color, collapse=','), '.\n') + pathway.colors = pathways.color + } + + names(pathway.colors) = names + + cat(paste('*** Processing pathways: ', paste(names, collapse=', ', sep=''), '\n', sep='')) + + cat(paste('\n[PATHWAY \"', names[1],'\"] ', paste(pathways[[1]], collapse=', ', sep=''), '\n', sep='')) + + data.pathways = as.pathway(x, pathway.genes=pathways[[1]], + pathway.name=names[1], aggregate.pathway = aggregate.pathways) + + data.pathways = change.color(data.pathways, 'Pathway', pathways.color[1]) + data.pathways = rename.type(data.pathways, 'Pathway', names[1]) + + if(length(names) > 1) + { + for(i in 2:length(pathways)) + { + cat(paste('\n\n[PATHWAY \"', names[i],'\"] ', paste(pathways[[i]], collapse=', ', sep=''), '\n', sep='')) + pathway = as.pathway(x, pathway.genes=pathways[[i]], pathway.name=names[i], aggregate.pathway = aggregate.pathways) + + pathway = change.color(pathway, 'Pathway', pathway.colors[i]) + pathway = rename.type(pathway, 'Pathway', names[i]) + +# show(pathway) +# # print(has.stages(pathway)) +# print('bindo..') +# print(has.stages(data.pathways)) + + data.pathways = ebind(data.pathways, pathway) +# show(data.pathways) + } + } + +# data.pathways = enforce.numeric(data.pathways) +# show(data.pathways) + ret = oncoprint(trim(data.pathways), title=title, file=file, ...) + + return(ret) +} + + +# #### Consensus matrix (intra-clusters) +# oncoprint.consensus = function(models, MIN.HITS = 0) +# { + + +# smaller.to.bigger = function(m,cn) +# { +# x = matrix(0, nrow = length(cn), ncol = length(cn)) +# rownames(x) = cn +# colnames(x) = cn + + +# for(i in 1:nrow(m)) +# for(j in 1:nrow(m)) +# x[rownames(m)[i], rownames(m)[j]] = ifelse(m[i,j] == 1, 1, 0) +# return(x) +# } + + +# # All the adjacency matrices +# matrices = list() +# for(i in 1:length(models)) +# matrices = append(matrices, list(models[[i]]$adj.matrix$adj.matrix.bic)) + +# # All their colnames - all possible events and types +# cn = unique(Reduce(union, lapply(matrices, colnames))) + +# all.events = NULL +# for(i in 1:length(models)) all.events = rbind(all.events, as.events(models[[i]]$data)) +# all.events = unique(all.events) + +# all.types = NULL +# for(i in 1:length(models)) all.types = rbind(all.types, models[[i]]$data$types) +# all.types = unique(all.types) + +# # Consensus + overall adjacency matrix +# consensus = Reduce('+', lapply(matrices, smaller.to.bigger, cn=cn)) +# adjacency = consensus +# adjacency[adjacency < MIN.HITS] = 0 +# adjacency[adjacency > 1] = 1 + +# cat('Consensus adjacency matrix:', nrow(adjacency), 'x', ncol(adjacency), ', minimum consensus', MIN.HITS, '\n') + + + +# keys = Reduce(rbind, lapply(models, as.events)) + +# types = data.frame(event=rep('pattern', nrow(super.model)), +# stringsAsFactors = F) + +# labels = data.frame(name=rep('none', nrow(super.model)), +# stringsAsFactors = F) + +# pattern.type = data.frame(pattern=rep('NA', nrow(super.model)), +# stringsAsFactors = F) + + +# for(i in 1:length(rownames(super.model))) +# { +# types$event[i] = keys[rownames(super.model)[i], 'type'] + +# labels$name[i] = keys[rownames(super.model)[i], 'event'] + +# prefix = gsub("_.*$", "", labels$name[i]) +# prefix = gsub( "\"","", prefix) + +# if(prefix %in% c('AND', 'OR', 'XOR')) +# { +# pattern.type$pattern[i] = prefix +# if(prefix == 'AND') pattern.type$pattern[i] = 'co-occurrence' +# if(prefix == 'OR') pattern.type$pattern[i] = 'soft exclusivity' +# if(prefix == 'XOR') pattern.type$pattern[i] = 'hard exclusivity' + +# compact.label = strsplit(labels$name[i], '_' )[[1]] +# labels$name[i] = paste( +# compact.label[2:length(compact.label)], +# collapse = ' / ') +# } +# rownames(super.model)[i] = +# paste( +# keys[rownames(super.model)[i], 'event'], +# keys[rownames(super.model)[i], 'type']) +# } + +# colnames(super.model) = rownames(super.model) +# rownames(types) = rownames(super.model) +# rownames(labels) = rownames(super.model) +# rownames(pattern.type) = rownames(super.model) + +# types[types == 'Hypothesis'] = 'Pattern' + +# ########### Filetering for recurrrent +# view = super.model +# view = view[apply(view, 1, max) > MIN,] +# view = view[, apply(view, 2, max) > MIN] + +# ########### Ordering according to the number of times a relation is found +# group.order = function(x, k, margin) +# { +# ord = which(apply(x, margin, max) == k) + +# if(margin == 1) +# { +# to.sort = x[ord, , drop = F] +# to.sort = to.sort[order(rowSums(to.sort), decreasing = T), , drop = F] +# rownames(x)[ord] = rownames(to.sort) +# x[ord, ] = to.sort +# } +# else +# { +# to.sort = x[, ord , drop = F] +# to.sort = to.sort[, order(colSums(to.sort), decreasing = T), drop = F] +# colnames(x)[ord] = colnames(to.sort) +# x[, ord] = to.sort +# } + +# return(x) +# } + +# selects.recurr = order(apply(view, 1, max), decreasing = T) +# view = view[selects.recurr, ] + +# max.values = apply(view, 1, max) +# for(i in 1:length(unique(max.values))) +# view = group.order(view, unique(max.values)[i], 1) + +# selected.recurr = order(apply(view, 2, max), decreasing = T) +# view = view[, selected.recurr] + +# max.values = apply(view, 2, max) +# for(i in 1:length(unique(max.values))) +# view = group.order(view, unique(max.values)[i], 2) + +# gaps.row = match( +# unique(apply(view, 1, max)), +# apply(view, 1, max)) - 1 + +# gaps.col = match( +# unique(apply(view, 2, max)), +# apply(view, 2, max)) - 1 + + +# ############ +# view.ones = view +# view.ones[view.ones > 1] = 1 +# ann.row = data.frame(selects=rowSums(view.ones), +# row.names = rownames(view), +# stringsAsFactors = F) + +# # ann.row$selects = .bincode(ann.row$selects, +# # c(0, +# # max(ann.row$selects) / 4, +# # max(ann.row$selects) / 2, +# # max(ann.row$selects) * 3 / 4, +# # max(ann.row$selects)), +# # TRUE) + + +# ann.row = cbind( +# types[rownames(ann.row), , drop = FALSE], +# pattern.type[rownames(ann.row), , drop = FALSE], +# ann.row) + +# ann.col = data.frame(selected=colSums(view.ones), +# row.names = colnames(view), +# stringsAsFactors = F) + +# ann.col = cbind( +# types[rownames(ann.col), , drop = FALSE], +# pattern.type[rownames(ann.col), , drop = FALSE], +# ann.col) + +# selection = max(max(ann.row$selects), max(ann.col$selected)) +# selection.palette = brewer.pal(9, 'PuBuGn')[4:9] +# # selection.palette = colorRampPalette(selection)(selection) + +# ann.colors = list( +# event = as.colors(Cluster3.methylation_subtype.aic$data), +# pattern = c('red', 'darkgreen', 'orange', 'white'), +# # selects = colorRampPalette(brewer.pal(9, 'PuBuGn'))(max(ann.row$selects) + 1), +# # selected = colorRampPalette(brewer.pal(9, 'PuBuGn'))(max(ann.col$selected) + 1) +# # selects = selection.palette, +# # selected = selection.palette +# selects = colorRampPalette(selection.palette)(max(ann.row$selects) + 1), +# selected = colorRampPalette(selection.palette)(max(ann.col$selected) + 1) +# ) +# ann.colors$event['Hypothesis'] = 'white' +# names(ann.colors$event)[4] = 'Pattern' +# names(ann.colors$pattern) = c('hard exclusivity', 'co-occurrence', 'soft exclusivity', 'NA') + +# print(ann.colors$selects) + +# map.color = colorRampPalette(brewer.pal(3, 'YlOrBr'))(max(super.model) + 1) +# # map.color = brewer.pal(max(super.model) + 1, 'YlOrBr') +# map.color[1] = 'gray92' + +# map.nozeroes = view +# map.nozeroes[view == 0] = '' + +# pheatmap(view, +# main = 'Selectivity relations (intra-clusters consensus)', +# scale = 'none', +# color = map.color, +# font.size=4, +# fontsize_col = 6, +# fontsize_row = 6, +# annotation_row = ann.row, +# annotation_col = ann.col, +# annotation_colors = ann.colors, +# fontsize_number = 6, +# cluster_rows = F, +# cluster_cols = F, +# labels_row = labels[rownames(view), ], +# labels_col = labels[colnames(view), ], +# number_format = '%d', +# display_numbers = map.nozeroes, +# gaps_col = gaps.col, +# gaps_row = gaps.row, +# border_color = 'lightgray' +# ) +# } + +#' export input for cbio visualization at http://www.cbioportal.org/public-portal/oncoprinter.jsp +#' @title oncoprint.cbio +#' +#' @examples +#' data(gistic) +#' gistic = import.GISTIC(gistic) +#' oncoprint.cbio(gistic) +#' +#' @param x A TRONCO compliant dataset. +#' @param file name of the file where to save the output +#' @param hom.del type of Homozygous Deletion +#' @param het.loss type of Heterozygous Loss +#' @param gain type of Gain +#' @param amp type of Amplification +#' @return A file containing instruction for the CBio visualization Tool +#' @export +oncoprint.cbio <- function(x, + file='oncoprint-cbio.txt', + hom.del = 'Homozygous Loss', + het.loss = 'Heterozygous Loss', + gain = 'Low-level Gain', + amp = 'High-level Gain') + +{ + is.compliant(x) + +# r = paste(paste(rownames(x$genotypes)), x$annotations[x$annotations[,''],], 'xxx') + r = 'Sample\tGene\tAlteration\n' + for(i in 1:nrow(x$genotypes)) + { + for(j in 1:ncol(x$genotypes)) + { + if(x$genotypes[i,j] == 1) + { + s = rownames(x$genotypes)[i] + g = x$annotations[colnames(x$genotypes)[j], 'event'] + + t = x$annotations[colnames(x$genotypes)[j], 'type'] + + t.o = 'xxx' + if(t == hom.del) t.o = 'HOMDEL' + if(t == het.loss) t.o = 'HETLOSS' + if(t == gain) t.o = 'GAIN' + if(t == amp) t.o = 'AMP' + +# cat(paste( s, g, t.o, '\n', sep=' ', collpase='')) + r = paste(r, s, '\t', g, '\t', t.o, '\n', sep='', collpase='') + } + } + + } + + write(r, file) +} + + +#' Generate PDF and laex tables +#' @title genes.table.report +#' +#' @param x A TRONCO compliant dataset. +#' @param name filename +#' @param dir working directory +#' @param maxrow maximum number of row per page +#' @param font document fontsize +#' @param height table height +#' @param width table width +#' @param fill fill color +#' @return LaTEX code +#' @importFrom gridExtra grid.table +#' @importFrom xtable xtable +#' @export genes.table.report +genes.table.report = function(x, name, dir=getwd(), maxrow=33, + font=10, height=11, width=8.5, fill="lightblue") +{ +# Print table with gridExtra and xtables + print.table = function(table, name, dir=getwd(), maxrow, font, height, + width, fill) + { + cat('\nPrinting PDF and Latex table to files: \n') + cat(paste('PDF \t\t', dir, '/', name, '.genes-table.pdf\n', sep='')) + cat(paste('Latex\t\t', dir, '/', name, '.genes-table.tex\n', sep='')) + cat('\n') + +# output pdf +# require(gridExtra) +# require(xtable) + + cur.dev = dev.cur() + + pdf(file=paste(dir, '/', name, '.genes-table.pdf', sep=''), height=height, width=width) + +# max rows per page + npages = ceiling(nrow(table)/maxrow); + + flush.console() + + pb = txtProgressBar(1, npages, style = 3); + for (i in 1:npages) + { + setTxtProgressBar(pb, i) + idx = seq(1+((i-1)*maxrow), i*maxrow); + + if(max(idx) > nrow(table)) idx = idx[idx < nrow(table)] + + grid.newpage() + grid.table(table[idx, ], + gpar.coretext = gpar(fontsize = font), + gpar.corefill = gpar(fill = fill, alpha=0.5, col = NA), + h.even.alpha = 0.5) + } + close(pb) + +# output latex + print(xtable(table, digits=0), file=paste(dir, '/', name, '.genes-table.tex', sep=''), type='latex') + + dev.off() + dev.set(which=cur.dev) + } + + cat(paste('Preparing output table with ', ngenes(x),' genes ...\n')) + genes = as.genes(x) + types = as.types(x) + + data = matrix(0, nrow=ngenes(x), ncol=ntypes(x)) + + genes.table = data.frame(data, row.names=genes, stringsAsFactors=FALSE) + colnames(genes.table) = types + + x = enforce.numeric(x) + + pb = txtProgressBar(1, ngenes(x), style = 3); + for(i in 1:ngenes(x)) + { + setTxtProgressBar(pb, i) + g = as.gene(x, genes=genes[i]) + + if(ncol(g) > 0) + { + gg = colSums(apply(g, 2, as.numeric)) + + genes.table[rownames(genes.table)[i], colnames(g)] = gg + genes.table[rownames(genes.table)[i], 'Alterations'] = paste( round(sum(gg) / nsamples(x) * 100), '%', sep='') + genes.table[rownames(genes.table)[i], 'Frequency'] = sum(gg) / nsamples(x) + } + + } +# close progress bar + close(pb) + + + genes.table = genes.table[order(genes.table$Frequency, decreasing = TRUE), ] + genes.table$Frequency = NULL + + print.table(table=genes.table, name=name, dir=getwd(), maxrow=maxrow, font=font, height=height, + width=width, fill=fill) + + return(genes.table) +} + + + +#' Generates stacked histogram +#' @title genes.table.plot +#' +#' @param x A TRONCO compliant dataset +#' @param name filename +#' @param dir where to save the file +#' @importFrom reshape2 melt +#' @importFrom ggplot2 ggplot geom_bar +#' @export genes.table.plot +genes.table.plot = function(x, name, dir=getwd()) +{ + + + + cat('Preparing output table: creating alterations profiles and selecting events with minimum frequency.\n') + alterations = sort.by.frequency(as.alterations(x)) + + cat('Stacked histogram with genes in the following order (head): ') + cat(head(as.genes(alterations))) + cat('\nRetrieving all events for the above genes.\n') + + y = events.selection(x, filter.in.names=as.genes(alterations)) + y = enforce.numeric(y) +# show(y) + +# print(ntypes(y)) +# print(ngenes(y)) + + table = matrix(rep(0, ntypes(y) * ngenes(y)), ncol=ntypes(y), nrow=ngenes(y)) + + rownames(table) = 1:ngenes(alterations) + colnames(table) = as.types(y) + + cat('Populating histogram table.\n') + pb = txtProgressBar(1, ngenes(alterations), style = 3); + for(i in 1:ngenes(alterations)) + { + setTxtProgressBar(pb, i) + + g = colSums(as.gene(y, gene=as.genes(alterations)[i])) + table[i, ] = g + } + close(pb) + + table = cbind(Rank=as.genes(alterations), table) + table.melt = melt(as.data.frame(table), id.var='Rank') + +#print(table) +#print(table.melt) + + +# Problem, does not work well - can't assign colors in as.colors(y) + p = ggplot(table.melt, aes(x = Rank, y = value, fill = variable)) + p + geom_bar(stat = "identity") + print(p) + + return(table.melt) +} +# # cat('Latex tables (genes)') +# genes.table.input = genes.report(input) +# genes.table.sub1 = genes.report(sub1) +# genes.table.sub2 = genes.report(sub2) +# genes.table.sub3 = genes.report(sub3) +# genes.table.sub4 = genes.report(sub4) +# genes.table.nh = genes.report(non.hyper) +# +# idx = rownames(genes.table.input) +# full.table = cbind(TOT=genes.table.input[idx, 'Freq'], genes.table.sub1[idx, ]) +# full.table = cbind( +# full.table, genes.table.sub2[idx, c('Amps', 'Dels', 'SNVs', 'Freq')], +# genes.table.sub3[idx, c('Amps', 'Dels', 'SNVs', 'Freq')], +# genes.table.sub4[idx, c('Amps', 'Dels', 'SNVs','Freq')]) +# +# print.table(full.table, 'Full-table', font = 10, width = 20) +# print.table(pathway.genes.df, 'genes-list') + + +# Calculate the likert +# +# @importFrom likert likert +# @param cluster_result Clustering result eg: [1, 2, 1, 3 ,3] +# @param sample_stage Stage in which the sample is eg: [3, 3, 1, 2 ,2] +# @param cluster_prefix Prefix to prefend to cluster data +# @param sample_prefix Prefix to prefend to stage data +# @export likertToClus +likertToClus <- function(cluster_result, sample_stage, cluster_prefix='', sample_prefix=''){ + + + +# check different value + cluster <- sort(unique(cluster_result), decreasing = T) + stage <- sort(unique(sample_stage), na.last = T) + +# print('') + +# create label ['1', '2'] + prefix='k' -> ['K1', 'K2'] + cluster_label <- paste(cluster_prefix, cluster, sep='') + stage_label <- paste(sample_prefix, stage, sep='') + +# create factor based on clustering and stage data + factor_c <- factor(cluster_result, labels = cluster_label, exclude = NULL) + factor_s <- factor(sample_stage, labels = stage_label, exclude = NULL) + +# data frame c1->stage, c2->cluster + likert_df <- data.frame(factor_s, factor_c) + col <- likert_df[,1, drop=F] +# calc likert + result <- likert(col, grouping = likert_df$factor_c) + result +} + + +#' @importFrom RColorBrewer brewer.pal +"cluster.sensitivity" <- function(cluster.map, reference, stages=NA, file=NA) { + + + if(ncol(cluster.map) == 1) stop('No clustering stability for a unique clustering map!') + + + if(!reference %in% colnames(cluster.map)) + stop(paste0('The reference cluster specified is not any of: ', + paste(colnames(cluster.map), collapse=', '), '.')) + + ref.clust = which(reference == colnames(cluster.map)) + colnames(cluster.map)[ref.clust] = paste(colnames(cluster.map)[ref.clust],' [reference]',sep='') + +# Transpose data +# cluster.map = t(cluster.map) + +# Sort data according to reference row + cluster.map = cluster.map[sort(cluster.map[, ref.clust ], decreasing=FALSE, index.return=TRUE)$ix, ]; + +# Get unique clustering IDs + id = apply(cluster.map, 2, unique) + id = unique(unlist(id)) + + print(apply(cluster.map, 2, unique)) + +# Compute the clustering score + subdata = cluster.map[, -ref.clust] + refcol = cluster.map[, ref.clust] + urefcol = unique(refcol) + + cat('Found the following cluster labels:', urefcol, '\n') + + cat('Computing clustering scores ... ') + + score = rep(0, nrow(cluster.map)) + + for(i in 1:length(urefcol)) + { + tmp = as.matrix(subdata[which(refcol==i), ]); + + curr.score = 0; + for (j in 1:ncol(tmp)) + { + curr.cardinality = sort(table(tmp[,j]),decreasing=TRUE)[[1]]; + curr.score = curr.score + (nrow(tmp) - curr.cardinality)/nrow(tmp); + } + + score[which(refcol==i)] = 1 - (curr.score/nrow(tmp)) + } + cat('DONE\n') + + +# Create annotations + cn = rownames(cluster.map) + + annotation = data.frame(sensitivity=score, row.names=cn, stringsAsFactors=FALSE) + + if(!all(is.na(stages))) + annotation$stage = stages[cn,1] + +# Create colors + col = brewer.pal(n = length(id), name = 'Set1') + + different.stages = sort(unique(annotation$stage)) + num.stages = length(different.stages) + + stage.color = append(brewer.pal(n = num.stages, name = 'YlOrRd'), '#FFFFFF') + names(stage.color) = append(levels(as.factor(different.stages)), NA) + + score.color = brewer.pal(n = 3, name = 'Greys') + +# Annotation colors + annotation_colors = list(stage=stage.color, sensitivity=score.color) + +# Settings + main = paste0("Clustering Sensitivity\n Reference : ", + reference,'\nAgainst : ', paste(colnames(subdata), collapse=', ')) + + + cat('Clustering rows in', nrow(cluster.map), 'clusters.\n') + order = sort(cluster.map[,ref.clust], decreasing=FALSE, index.return=TRUE) + + + + pheatmap(cluster.map, + scale = "none", + cluster_col= F, + cluster_rows = F, + col=col, + main=main, + fontsize=6, + fontsize_col=8, + fontsize_row = 2, + annotation_row = annotation, + annotation_colors = annotation_colors, + border_color='lightgray', + border=T, + margins=c(10,10), + cellwidth = 25, + cellheight = 2.2, +# legend=T, + legend_breaks = 1:4, + filename=file, + gaps_col = 1:3, +# display_numbers = T, + cutree_rows = nrow(cluster.map), + gaps_row = (match(urefcol, order$x) - 1) +# number_format = '%d', + ) + + return(cluster.map) + } + + lo = function(rown, coln, nrow, ncol, cellheight = NA, cellwidth = NA, treeheight_col, treeheight_row, legend, annotation_row, annotation_col, annotation_colors, annotation_legend, main, fontsize, fontsize_row, fontsize_col, gaps_row, gaps_col, ...){ +# Get height of colnames and length of rownames + if(!is.null(coln[1])){ + t = c(coln, colnames(annotation_row)) + longest_coln = which.max(strwidth(t, units = 'in')) + gp = list(fontsize = fontsize_col, ...) + coln_height = unit(1, "grobheight", textGrob(t[longest_coln], rot = 90, gp = do.call(gpar, gp))) + unit(10, "bigpts") + } + else{ + coln_height = unit(5, "bigpts") + } + + if(!is.null(rown[1])){ + t = c(rown, colnames(annotation_col)) + longest_rown = which.max(strwidth(t, units = 'in')) + gp = list(fontsize = fontsize_row, ...) + rown_width = unit(1, "grobwidth", textGrob(t[longest_rown], gp = do.call(gpar, gp))) + unit(10, "bigpts") + } + else{ + rown_width = unit(5, "bigpts") + } + + gp = list(fontsize = fontsize, ...) +# Legend position + if(!is.na(legend[1])){ + longest_break = which.max(nchar(names(legend))) + longest_break = unit(1.1, "grobwidth", textGrob(as.character(names(legend))[longest_break], gp = do.call(gpar, gp))) + title_length = unit(1.1, "grobwidth", textGrob("Scale", gp = gpar(fontface = "bold", ...))) + legend_width = unit(12, "bigpts") + longest_break * 1.2 + legend_width = max(title_length, legend_width) + } + else{ + legend_width = unit(0, "bigpts") + } + +# Set main title height + if(is.na(main)){ + main_height = unit(0, "npc") + } + else{ + main_height = unit(1.5, "grobheight", textGrob(main, gp = gpar(fontsize = 1.3 * fontsize, ...))) + } + +# Column annotations + textheight = unit(fontsize, "bigpts") + + if(!is.na(annotation_col[[1]][1])){ +# Column annotation height + annot_col_height = ncol(annotation_col) * (textheight + unit(2, "bigpts")) + unit(2, "bigpts") + +# Width of the correponding legend + t = c(as.vector(as.matrix(annotation_col)), colnames(annotation_col)) + annot_col_legend_width = unit(1.2, "grobwidth", textGrob(t[which.max(nchar(t))], gp = gpar(...))) + unit(12, "bigpts") + if(!annotation_legend){ + annot_col_legend_width = unit(0, "npc") + } + } + else{ + annot_col_height = unit(0, "bigpts") + annot_col_legend_width = unit(0, "bigpts") + } + +# Row annotations + if(!is.na(annotation_row[[1]][1])){ +# Row annotation width + annot_row_width = ncol(annotation_row) * (textheight + unit(2, "bigpts")) + unit(2, "bigpts") + +# Width of the correponding legend + t = c(as.vector(as.matrix(annotation_row)), colnames(annotation_row)) + annot_row_legend_width = unit(1.2, "grobwidth", textGrob(t[which.max(nchar(t))], gp = gpar(...))) + unit(12, "bigpts") + if(!annotation_legend){ + annot_row_legend_width = unit(0, "npc") + } + } + else{ + annot_row_width = unit(0, "bigpts") + annot_row_legend_width = unit(0, "bigpts") + } + + annot_legend_width = max(annot_row_legend_width, annot_col_legend_width) + +# Tree height + treeheight_col = unit(treeheight_col, "bigpts") + unit(5, "bigpts") + treeheight_row = unit(treeheight_row, "bigpts") + unit(5, "bigpts") + +# Set cell sizes + if(is.na(cellwidth)){ + mat_width = unit(1, "npc") - rown_width - legend_width - treeheight_row - annot_row_width - annot_legend_width + } + else{ + mat_width = unit(cellwidth * ncol, "bigpts") + length(gaps_col) * unit(4, "bigpts") + } + + if(is.na(cellheight)){ + mat_height = unit(1, "npc") - main_height - coln_height - treeheight_col - annot_col_height + } + else{ + mat_height = unit(cellheight * nrow, "bigpts") + length(gaps_row) * unit(4, "bigpts") + } + +# Produce gtable + gt = gtable( + widths = unit.c( + treeheight_row, + annot_row_width, + mat_width, + rown_width, + legend_width, + annot_legend_width), + heights = unit.c( + main_height, + treeheight_col, + annot_col_height, + mat_height, + coln_height), + vp = viewport(gp = do.call(gpar, gp))) + +#print(unit.c(main_height, treeheight_col, annot_col_height, mat_height, coln_height)) + + cw = convertWidth(mat_width - (length(gaps_col) * unit(4, "bigpts")), "bigpts", valueOnly = T) / ncol + ch = convertHeight(mat_height - (length(gaps_row) * unit(4, "bigpts")), "bigpts", valueOnly = T) / nrow + +# Return minimal cell dimension in bigpts to decide if borders are drawn + mindim = min(cw, ch) + + res = list(gt = gt, mindim = mindim) + + return(res) + } + + find_coordinates = function(n, gaps, m = 1:n){ + if(length(gaps) == 0){ + return(list(coord = unit(m / n, "npc"), size = unit(1 / n, "npc") )) + } + + if(max(gaps) > n){ + stop("Gaps do not match with matrix size") + } + + size = (1 / n) * (unit(1, "npc") - length(gaps) * unit("4", "bigpts")) + + gaps2 = apply(sapply(gaps, function(gap, x){x > gap}, m), 1, sum) + coord = m * size + (gaps2 * unit("4", "bigpts")) + + return(list(coord = coord, size = size)) + } + + draw_dendrogram = function(hc, gaps, horizontal = T){ + h = hc$height / max(hc$height) / 1.05 + m = hc$merge + o = hc$order + n = length(o) + + m[m > 0] = n + m[m > 0] + m[m < 0] = abs(m[m < 0]) + + dist = matrix(0, nrow = 2 * n - 1, ncol = 2, dimnames = list(NULL, c("x", "y"))) + dist[1:n, 1] = 1 / n / 2 + (1 / n) * (match(1:n, o) - 1) + + for(i in 1:nrow(m)){ + dist[n + i, 1] = (dist[m[i, 1], 1] + dist[m[i, 2], 1]) / 2 + dist[n + i, 2] = h[i] + } + + draw_connection = function(x1, x2, y1, y2, y){ + res = list( + x = c(x1, x1, x2, x2), + y = c(y1, y, y, y2) + ) + + return(res) + } + + x = rep(NA, nrow(m) * 4) + y = rep(NA, nrow(m) * 4) + id = rep(1:nrow(m), rep(4, nrow(m))) + + for(i in 1:nrow(m)){ + c = draw_connection(dist[m[i, 1], 1], dist[m[i, 2], 1], dist[m[i, 1], 2], dist[m[i, 2], 2], h[i]) + k = (i - 1) * 4 + 1 + x[k : (k + 3)] = c$x + y[k : (k + 3)] = c$y + } + + x = find_coordinates(n, gaps, x * n)$coord + y = unit(y, "npc") + + if(!horizontal){ + a = x + x = unit(1, "npc") - y + y = unit(1, "npc") - a + } + res = polylineGrob(x = x, y = y, id = id) + + return(res) + } + + draw_matrix = function(matrix, border_color, gaps_rows, gaps_cols, fmat, fontsize_number, number_color){ + n = nrow(matrix) + m = ncol(matrix) + + coord_x = find_coordinates(m, gaps_cols) + coord_y = find_coordinates(n, gaps_rows) + + x = coord_x$coord - 0.5 * coord_x$size + y = unit(1, "npc") - (coord_y$coord - 0.5 * coord_y$size) + + coord = expand.grid(y = y, x = x) + + res = gList() + + res[["rect"]] = rectGrob(x = coord$x, y = coord$y, width = coord_x$size, height = coord_y$size, gp = gpar(fill = matrix, col = border_color)) + + if(attr(fmat, "draw")){ + res[["text"]] = textGrob(x = coord$x, y = coord$y, label = fmat, gp = gpar(col = number_color, fontsize = fontsize_number)) + } + + res = gTree(children = res) + + return(res) + } + + draw_colnames = function(coln, gaps, ...){ + coord = find_coordinates(length(coln), gaps) + x = coord$coord - 0.5 * coord$size + + res = textGrob(coln, x = x, y = unit(1, "npc") - unit(3, "bigpts"), vjust = 0.5, hjust = 0, rot = 270, gp = gpar(...)) + + return(res) + } + + draw_rownames = function(rown, gaps, ...){ + coord = find_coordinates(length(rown), gaps) + y = unit(1, "npc") - (coord$coord - 0.5 * coord$size) + + res = textGrob(rown, x = unit(3, "bigpts"), y = y, vjust = 0.5, hjust = 0, gp = gpar(...)) + + return(res) + } + + draw_legend = function(color, breaks, txt.stats, legend.cex, legend, ...){ + + + height = min(unit(1 * legend.cex, "npc"), unit(150 * legend.cex, "bigpts")) + +# print('*****') +#print(height) +# print('*****') + +# print(breaks) + +# print('dar_legend') + legend_pos = (legend - min(breaks)) / (max(breaks) - min(breaks)) + legend_pos = height * legend_pos + (unit(1, "npc") - height) + + breaks = (breaks - min(breaks)) / (max(breaks) - min(breaks)) + breaks = height * breaks + (unit(1, "npc") - height) + +# print(breaks) +# print(legend_pos[length(legend_pos)]) +# print(breaks[-length(breaks)]) + + h = breaks[-1] - breaks[-length(breaks)] + +# print('***** br') +# print(breaks) +# print('*****') + +# print(breaks[-length(breaks)]+unit(1, "npc")) + +# print('***** h') +# print(h) +# print('*****') +# h = cellheight + + rect = rectGrob(x = 0, y = breaks[-length(breaks)], width = unit(10, "bigpts"), height = h, hjust = 0, vjust = 0, gp = gpar(fill = color, col = "#FFFFFF00")) + text = textGrob(names(legend), x = unit(14, "bigpts"), y = legend_pos, hjust = 0, gp = gpar(...)) + +# y.stats = breaks[-length(breaks)]-unit(.1, "npc") +# stats = rectGrob(x = 0, y = y.stats, width = unit(10, "bigpts"), height = h, hjust = 0, vjust = 0, gp = gpar(fill ='black')) + + + if(!is.na(txt.stats)) + { + +# rect = rectGrob(x = 0, +# y = breaks[-length(breaks)], +# width = unit(10, "bigpts"), height = h, hjust = 0, vjust = 0, gp = gpar(fill = color, col = "red")) + + + crlf = strsplit(txt.stats, split = "\\n")[[1]] + h = length(crlf) / 6 + + stats = textGrob(txt.stats, x = unit(2, "bigpts"), + y = legend_pos[1] - unit(2, "cm"), + hjust = 0, gp = gpar(fontface='bold')) + + + res = grobTree(rect, text, stats) + + + } + else + res = grobTree(rect, text) + + return(res) + } + + convert_annotations = function(annotation, annotation_colors){ + new = annotation + for(i in 1:ncol(annotation)){ + a = annotation[, i] + b = annotation_colors[[colnames(annotation)[i]]] + if(is.character(a) | is.factor(a)){ + a = as.character(a) + if(length(setdiff(a, names(b))) > 0){ + stop(sprintf("Factor levels on variable %s do not match with annotation_colors", colnames(annotation)[i])) + } + new[, i] = b[a] + } + else{ + a = cut(a, breaks = 100) + new[, i] = colorRampPalette(b)(100)[a] + } + } + return(as.matrix(new)) + } + + draw_annotations = function(converted_annotations, border_color, gaps, fontsize, horizontal){ + n = ncol(converted_annotations) + m = nrow(converted_annotations) + + coord_x = find_coordinates(m, gaps) + + x = coord_x$coord - 0.5 * coord_x$size + +# y = cumsum(rep(fontsize, n)) - 4 + cumsum(rep(2, n)) + y = cumsum(rep(fontsize, n)) + cumsum(rep(2, n)) - fontsize / 2 + 1 + y = unit(y, "bigpts") + + if(horizontal){ + coord = expand.grid(x = x, y = y) + res = rectGrob(x = coord$x, y = coord$y, width = coord_x$size, height = unit(fontsize, "bigpts"), gp = gpar(fill = converted_annotations, col = border_color)) + } + else{ + a = x + x = unit(1, "npc") - y + y = unit(1, "npc") - a + + coord = expand.grid(y = y, x = x) + res = rectGrob(x = coord$x, y = coord$y, width = unit(fontsize, "bigpts"), height = coord_x$size, gp = gpar(fill = converted_annotations, col = border_color)) + } + + return(res) + } + + draw_annotation_names = function(annotations, fontsize, horizontal){ + n = ncol(annotations) + + x = unit(3, "bigpts") + + y = cumsum(rep(fontsize, n)) + cumsum(rep(2, n)) - fontsize / 2 + 1 + y = unit(y, "bigpts") + + if(horizontal){ + res = textGrob(colnames(annotations), x = x, y = y, hjust = 0, gp = gpar(fontsize = fontsize, fontface = 2)) + } + else{ + a = x + x = unit(1, "npc") - y + y = unit(1, "npc") - a + + res = textGrob(colnames(annotations), x = x, y = y, vjust = 0.5, hjust = 0, rot = 270, gp = gpar(fontsize = fontsize, fontface = 2)) + } + + return(res) + } + + draw_annotation_legend = function(annotation, annotation_colors, border_color, ...){ + y = unit(1, "npc") + text_height = unit(1, "grobheight", textGrob("FGH", gp = gpar(...))) + + res = gList() + for(i in names(annotation)){ + res[[i]] = textGrob(i, x = 0, y = y, vjust = 1, hjust = 0, gp = gpar(fontface = "bold", ...)) + + y = y - 1.5 * text_height + if(is.character(annotation[[i]]) | is.factor(annotation[[i]])){ + n = length(annotation_colors[[i]]) + yy = y - (1:n - 1) * 2 * text_height + + res[[paste(i, "r")]] = rectGrob(x = unit(0, "npc"), y = yy, hjust = 0, vjust = 1, height = 2 * text_height, width = 2 * text_height, gp = gpar(col = border_color, fill = annotation_colors[[i]])) + res[[paste(i, "t")]] = textGrob(names(annotation_colors[[i]]), x = text_height * 2.4, y = yy - text_height, hjust = 0, vjust = 0.5, gp = gpar(...)) + + y = y - n * 2 * text_height + + } + else{ + yy = y - 8 * text_height + seq(0, 1, 0.25)[-1] * 8 * text_height + h = 8 * text_height * 0.25 + + res[[paste(i, "r")]] = rectGrob(x = unit(0, "npc"), y = yy, hjust = 0, vjust = 1, height = h, width = 2 * text_height, gp = gpar(col = NA, fill = colorRampPalette(annotation_colors[[i]])(4))) + res[[paste(i, "r2")]] = rectGrob(x = unit(0, "npc"), y = y, hjust = 0, vjust = 1, height = 8 * text_height, width = 2 * text_height, gp = gpar(col = border_color)) + + txt = rev(range(grid.pretty(range(annotation[[i]], na.rm = TRUE)))) + yy = y - c(1, 7) * text_height + res[[paste(i, "t")]] = textGrob(txt, x = text_height * 2.4, y = yy, hjust = 0, vjust = 0.5, gp = gpar(...)) + y = y - 8 * text_height + } + y = y - 1.5 * text_height + } + + res = gTree(children = res) + + return(res) + } + + draw_main = function(text, ...){ + res = textGrob(text, gp = gpar(fontface = "bold", ...)) + + return(res) + } + + vplayout = function(x, y){ + return(viewport(layout.pos.row = x, layout.pos.col = y)) + } + + heatmap_motor = function(matrix, border_color, cellwidth, cellheight, tree_col, tree_row, treeheight_col, treeheight_row, filename, width, height, breaks, color, legend, annotation_row, annotation_col, annotation_colors, annotation_legend, main, fontsize, fontsize_row, fontsize_col, fmat, fontsize_number, number_color, gaps_col, gaps_row, labels_row, labels_col, legend.cex, txt.stats, ...){ +# Set layout + lo = lo(coln = labels_col, rown = labels_row, nrow = nrow(matrix), ncol = ncol(matrix), cellwidth = cellwidth, cellheight = cellheight, treeheight_col = treeheight_col, treeheight_row = treeheight_row, legend = legend, annotation_col = annotation_col, annotation_row = annotation_row, annotation_colors = annotation_colors, annotation_legend = annotation_legend, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, gaps_row = gaps_row, gaps_col = gaps_col, ...) + + res = lo$gt + mindim = lo$mindim + + if(!is.na(filename)){ + if(is.na(height)){ + height = convertHeight(gtable_height(res), "inches", valueOnly = T) + } + if(is.na(width)){ + width = convertWidth(gtable_width(res), "inches", valueOnly = T) + } + +# Get file type + r = regexpr("\\.[a-zA-Z]*$", filename) + if(r == -1) stop("Improper filename") + ending = substr(filename, r + 1, r + attr(r, "match.length")) + + f = switch(ending, + pdf = function(x, ...) pdf(x, ...), + png = function(x, ...) png(x, units = "in", res = 300, ...), + jpeg = function(x, ...) jpeg(x, units = "in", res = 300, ...), + jpg = function(x, ...) jpeg(x, units = "in", res = 300, ...), + tiff = function(x, ...) tiff(x, units = "in", res = 300, compression = "lzw", ...), + bmp = function(x, ...) bmp(x, units = "in", res = 300, ...), + stop("File type should be: pdf, png, bmp, jpg, tiff") + ) + +# print(sprintf("height:%f width:%f", height, width)) + +# gt = heatmap_motor(matrix, cellwidth = cellwidth, cellheight = cellheight, border_color = border_color, tree_col = tree_col, tree_row = tree_row, treeheight_col = treeheight_col, treeheight_row = treeheight_row, breaks = breaks, color = color, legend = legend, annotation_col = annotation_col, annotation_row = annotation_row, annotation_colors = annotation_colors, annotation_legend = annotation_legend, filename = NA, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, fmat = fmat, fontsize_number = fontsize_number, number_color = number_color, labels_row = labels_row, labels_col = labels_col, gaps_col = gaps_col, gaps_row = gaps_row, ...) + + f(filename, height = height, width = width) + gt = heatmap_motor(matrix, cellwidth = cellwidth, cellheight = cellheight, border_color = border_color, tree_col = tree_col, tree_row = tree_row, treeheight_col = treeheight_col, treeheight_row = treeheight_row, breaks = breaks, color = color, legend = legend, annotation_col = annotation_col, annotation_row = annotation_row, annotation_colors = annotation_colors, annotation_legend = annotation_legend, filename = NA, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, fmat = fmat, fontsize_number = fontsize_number, number_color = number_color, labels_row = labels_row, labels_col = labels_col, gaps_col = gaps_col, gaps_row = gaps_row, legend.cex = legend.cex, txt.stats = txt.stats, ...) + grid.draw(gt) + dev.off() + + return(gt) + } + +# Omit border color if cell size is too small + if(mindim < 3) border_color = NA + +# Draw title + if(!is.na(main)){ + elem = draw_main(main, fontsize = 1.3 * fontsize, ...) + res = gtable_add_grob(res, elem, t = 1, l = 3, name = "main") + } + +# Draw tree for the columns + if(!is.na(tree_col[[1]][1]) & treeheight_col != 0){ + elem = draw_dendrogram(tree_col, gaps_col, horizontal = T) + res = gtable_add_grob(res, elem, t = 2, l = 3, name = "col_tree") + } + +# Draw tree for the rows + if(!is.na(tree_row[[1]][1]) & treeheight_row != 0){ + elem = draw_dendrogram(tree_row, gaps_row, horizontal = F) + res = gtable_add_grob(res, elem, t = 4, l = 1, name = "row_tree") + } + +# Draw matrix + elem = draw_matrix(matrix, border_color, gaps_row, gaps_col, fmat, fontsize_number, number_color) + res = gtable_add_grob(res, elem, t = 4, l = 3, clip = "off", name = "matrix") + + +# Draw colnames + if(length(labels_col) != 0){ + pars = list(labels_col, gaps = gaps_col, fontsize = fontsize_col, ...) + elem = do.call(draw_colnames, pars) + res = gtable_add_grob(res, elem, t = 5, l = 3, clip = "off", name = "col_names") + } + +# Giulio +#res = gtable_add_grob( +# res, elem, t = 6, l = 3, clip = "off", name = "giulio" +# ) + +# Draw rownames + if(length(labels_row) != 0){ + pars = list(labels_row, gaps = gaps_row, fontsize = fontsize_row, ...) + elem = do.call(draw_rownames, pars) + res = gtable_add_grob(res, elem, t = 4, l = 4, clip = "off", name = "row_names") + } + +# Draw annotation tracks on cols + if(!is.na(annotation_col[[1]][1])){ +# Draw tracks + converted_annotation = convert_annotations(annotation_col, annotation_colors) + elem = draw_annotations(converted_annotation, border_color, gaps_col, fontsize, horizontal = T) + res = gtable_add_grob(res, elem, t = 3, l = 3, clip = "off", name = "col_annotation") + +# Draw names + elem = draw_annotation_names(annotation_col, fontsize, horizontal = T) + res = gtable_add_grob(res, elem, t = 3, l = 4, clip = "off", name = "row_annotation_names") + + } + +# Draw annotation tracks on rows + if(!is.na(annotation_row[[1]][1])){ +# Draw tracks + converted_annotation = convert_annotations(annotation_row, annotation_colors) + elem = draw_annotations(converted_annotation, border_color, gaps_row, fontsize, horizontal = F) + res = gtable_add_grob(res, elem, t = 4, l = 2, clip = "off", name = "row_annotation") + +# Draw names + elem = draw_annotation_names(annotation_row, fontsize, horizontal = F) + res = gtable_add_grob(res, elem, t = 5, l = 2, clip = "off", name = "row_annotation_names") + } + +# Draw annotation legend + annotation = c(annotation_col[length(annotation_col):1], annotation_row[length(annotation_row):1]) + annotation = annotation[unlist(lapply(annotation, function(x) !is.na(x[1])))] + + if(length(annotation) > 0 & annotation_legend){ + elem = draw_annotation_legend(annotation, annotation_colors, border_color, fontsize = fontsize, ...) + + t = ifelse(is.null(labels_row), 4, 3) + res = gtable_add_grob(res, elem, t = t, l = 6, b = 5, clip = "off", name = "annotation_legend") + } + +# Draw legend + if(!is.na(legend[1])){ + elem = draw_legend(color, breaks, txt.stats, legend, legend.cex = legend.cex, fontsize = fontsize, ...) + + t = ifelse(is.null(labels_row), 4, 3) + res = gtable_add_grob(res, elem, t = t, l = 5, b= 5, clip = "off", name = "legend") + } + + return(res) + } + + generate_breaks = function(x, n, center = F){ + if(center){ + m = max(abs(c(min(x, na.rm = T), max(x, na.rm = T)))) + res = seq(-m, m, length.out = n + 1) + } + else{ + res = seq(min(x, na.rm = T), max(x, na.rm = T), length.out = n + 1) + } + + return(res) + } + + scale_vec_colours = function(x, col = rainbow(10), breaks = NA){ + return(col[as.numeric(cut(x, breaks = breaks, include.lowest = T))]) + } + + scale_colours = function(mat, col = rainbow(10), breaks = NA){ + mat = as.matrix(mat) + return(matrix(scale_vec_colours(as.vector(mat), col = col, breaks = breaks), nrow(mat), ncol(mat), dimnames = list(rownames(mat), colnames(mat)))) + } + + cluster_mat = function(mat, distance, method){ + if(!(method %in% c("ward.D2", "ward", "single", "complete", "average", "mcquitty", "median", "centroid"))){ + stop("clustering method has to one form the list: 'ward', 'ward.D2', 'single', 'complete', 'average', 'mcquitty', 'median' or 'centroid'.") + } + if(!(distance[1] %in% c("correlation", "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski")) & class(distance) != "dist"){ + stop("distance has to be a dissimilarity structure as produced by dist or one measure form the list: 'correlation', 'euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'minkowski'") + } + if(distance[1] == "correlation"){ + d = as.dist(1 - cor(t(mat))) + } + else{ + if(class(distance) == "dist"){ + d = distance + } + else{ + d = dist(mat, method = distance) + } + } + + return(hclust(d, method = method)) + } + + scale_rows = function(x){ + m = apply(x, 1, mean, na.rm = T) + s = apply(x, 1, sd, na.rm = T) + return((x - m) / s) + } + + scale_mat = function(mat, scale){ + if(!(scale %in% c("none", "row", "column"))){ + stop("scale argument shoud take values: 'none', 'row' or 'column'") + } + mat = switch(scale, none = mat, row = scale_rows(mat), column = t(scale_rows(t(mat)))) + return(mat) + } + + generate_annotation_colours = function(annotation, annotation_colors, drop){ + if(is.na(annotation_colors)[[1]][1]){ + annotation_colors = list() + } + count = 0 + for(i in 1:length(annotation)){ + if(is.character(annotation[[i]]) | is.factor(annotation[[i]])){ + if (is.factor(annotation[[i]]) & !drop){ + count = count + length(levels(annotation[[i]])) + } + else{ + count = count + length(unique(annotation[[i]])) + } + } + } + + factor_colors = dscale(factor(1:count), hue_pal(l = 75)) + + set.seed(3453) + + cont_counter = 2 + for(i in 1:length(annotation)){ + if(!(names(annotation)[i] %in% names(annotation_colors))){ + if(is.character(annotation[[i]]) | is.factor(annotation[[i]])){ + n = length(unique(annotation[[i]])) + if (is.factor(annotation[[i]]) & !drop){ + n = length(levels(annotation[[i]])) + } + ind = sample(1:length(factor_colors), n) + annotation_colors[[names(annotation)[i]]] = factor_colors[ind] + l = levels(as.factor(annotation[[i]])) + l = l[l %in% unique(annotation[[i]])] + if (is.factor(annotation[[i]]) & !drop){ + l = levels(annotation[[i]]) + } + + names(annotation_colors[[names(annotation)[i]]]) = l + factor_colors = factor_colors[-ind] + } + else{ + annotation_colors[[names(annotation)[i]]] = brewer_pal("seq", cont_counter)(5)[1:4] + cont_counter = cont_counter + 1 + } + } + } + return(annotation_colors) + } + + kmeans_pheatmap = function(mat, k = min(nrow(mat), 150), sd_limit = NA, ...){ +# Filter data + if(!is.na(sd_limit)){ + s = apply(mat, 1, sd) + mat = mat[s > sd_limit, ] + } + +# Cluster data + set.seed(1245678) + km = kmeans(mat, k, iter.max = 100) + mat2 = km$centers + +# Compose rownames + t = table(km$cluster) + rownames(mat2) = sprintf("cl%s_size_%d", names(t), t) + +# Draw heatmap + pheatmap(mat2, ...) + } + + find_gaps = function(tree, cutree_n){ + v = cutree(tree, cutree_n)[tree$order] + gaps = which((v[-1] - v[-length(v)]) != 0) + + } + +#' A function to draw clustered heatmaps. +#' +#' A function to draw clustered heatmaps where one has better control over some graphical +#' parameters such as cell size, etc. +#' +#' The function also allows to aggregate the rows using kmeans clustering. This is +#' advisable if number of rows is so big that R cannot handle their hierarchical +#' clustering anymore, roughly more than 1000. Instead of showing all the rows +#' separately one can cluster the rows in advance and show only the cluster centers. +#' The number of clusters can be tuned with parameter kmeans_k. +#' +#' @param mat numeric matrix of the values to be plotted. +#' @param color vector of colors used in heatmap. +#' @param kmeans_k the number of kmeans clusters to make, if we want to agggregate the +#' rows before drawing heatmap. If NA then the rows are not aggregated. +#' @param breaks a sequence of numbers that covers the range of values in mat and is one +#' element longer than color vector. Used for mapping values to colors. Useful, if needed +#' to map certain values to certain colors, to certain values. If value is NA then the +#' breaks are calculated automatically. +#' @param border_color color of cell borders on heatmap, use NA if no border should be +#' drawn. +#' @param cellwidth individual cell width in points. If left as NA, then the values +#' depend on the size of plotting window. +#' @param cellheight individual cell height in points. If left as NA, +#' then the values depend on the size of plotting window. +#' @param scale character indicating if the values should be centered and scaled in +#' either the row direction or the column direction, or none. Corresponding values are +#' \code{"row"}, \code{"column"} and \code{"none"} +#' @param cluster_rows boolean values determining if rows should be clustered, +#' @param cluster_cols boolean values determining if columns should be clustered. +#' @param clustering_distance_rows distance measure used in clustering rows. Possible +#' values are \code{"correlation"} for Pearson correlation and all the distances +#' supported by \code{\link{dist}}, such as \code{"euclidean"}, etc. If the value is none +#' of the above it is assumed that a distance matrix is provided. +#' @param clustering_distance_cols distance measure used in clustering columns. Possible +#' values the same as for clustering_distance_rows. +#' @param clustering_method clustering method used. Accepts the same values as +#' \code{\link{hclust}}. +#' @param cutree_rows number of clusters the rows are divided into, based on the +#' hierarchical clustering (using cutree), if rows are not clustered, the +#' argument is ignored +#' @param cutree_cols similar to \code{cutree_rows}, but for columns +#' @param treeheight_row the height of a tree for rows, if these are clustered. +#' Default value 50 points. +#' @param treeheight_col the height of a tree for columns, if these are clustered. +#' Default value 50 points. +#' @param legend logical to determine if legend should be drawn or not. +#' @param legend_breaks vector of breakpoints for the legend. +#' @param legend_labels vector of labels for the \code{legend_breaks}. +#' @param annotation_row data frame that specifies the annotations shown on left +#' side of the heatmap. Each row defines the features for a specific row. The +#' rows in the data and in the annotation are matched using corresponding row +#' names. Note that color schemes takes into account if variable is continuous +#' or discrete. +#' @param annotation_col similar to annotation_row, but for columns. +#' @param annotation deprecated parameter that currently sets the annotation_col if it is missing +#' @param annotation_colors list for specifying annotation_row and +#' annotation_col track colors manually. It is possible to define the colors +#' for only some of the features. Check examples for details. +#' @param annotation_legend boolean value showing if the legend for annotation +#' tracks should be drawn. +#' @param drop_levels logical to determine if unused levels are also shown in +#' the legend +#' @param show_rownames boolean specifying if column names are be shown. +#' @param show_colnames boolean specifying if column names are be shown. +#' @param main the title of the plot +#' @param fontsize base fontsize for the plot +#' @param legend.cex Default 0.5; determines legend size if \code{legend = TRUE} +#' @param fontsize_row fontsize for rownames (Default: fontsize) +#' @param fontsize_col fontsize for colnames (Default: fontsize) +#' @param display_numbers logical determining if the numeric values are also printed to +#' the cells. If this is a matrix (with same dimensions as original matrix), the contents +#' of the matrix are shown instead of original values. +#' @param number_format format strings (C printf style) of the numbers shown in cells. +#' For example "\code{\%.2f}" shows 2 decimal places and "\code{\%.1e}" shows exponential +#' notation (see more in \code{\link{sprintf}}). +#' @param number_color color of the text +#' @param fontsize_number fontsize of the numbers displayed in cells +#' @param gaps_row vector of row indices that show shere to put gaps into +#' heatmap. Used only if the rows are not clustered. See \code{cutree_row} +#' to see how to introduce gaps to clustered rows. +#' @param gaps_col similar to gaps_row, but for columns. +#' @param labels_row custom labels for rows that are used instead of rownames. +#' @param labels_col similar to labels_row, but for columns. +#' @param filename file path where to save the picture. Filetype is decided by +#' the extension in the path. Currently following formats are supported: png, pdf, tiff, +#' bmp, jpeg. Even if the plot does not fit into the plotting window, the file size is +#' calculated so that the plot would fit there, unless specified otherwise. +#' @param width manual option for determining the output file width in inches. +#' @param height manual option for determining the output file height in inches. +#' @param silent do not draw the plot (useful when using the gtable output) +#' @param txt.stats By default, shows a summary statistics for shown data (n,m, |G| and |P|) +#' @param \dots graphical parameters for the text used in plot. Parameters passed to +#' \code{\link{grid.text}}, see \code{\link{gpar}}. +#' +#' @return +#' Invisibly a list of components +#' \itemize{ +#' \item \code{tree_row} the clustering of rows as \code{\link{hclust}} object +#' \item \code{tree_col} the clustering of columns as \code{\link{hclust}} object +#' \item \code{kmeans} the kmeans clustering of rows if parameter \code{kmeans_k} was +#' specified +#' } +#' +#' @author Raivo Kolde +#' @examples +#' # Create test matrix +#' test = matrix(rnorm(200), 20, 10) +#' test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3 +#' test[11:20, seq(2, 10, 2)] = test[11:20, seq(2, 10, 2)] + 2 +#' test[15:20, seq(2, 10, 2)] = test[15:20, seq(2, 10, 2)] + 4 +#' colnames(test) = paste("Test", 1:10, sep = "") +#' rownames(test) = paste("Gene", 1:20, sep = "") +#' +#' # Draw heatmaps +#' pheatmap(test) +#' pheatmap(test, kmeans_k = 2) +#' pheatmap(test, scale = "row", clustering_distance_rows = "correlation") +#' pheatmap(test, color = colorRampPalette(c("navy", "white", "firebrick3"))(50)) +#' pheatmap(test, cluster_row = FALSE) +#' pheatmap(test, legend = FALSE) +#' +#' # Show text within cells +#' pheatmap(test, display_numbers = TRUE) +#' pheatmap(test, display_numbers = TRUE, number_format = "\%.1e") +#' pheatmap(test, display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test))) +#' pheatmap(test, cluster_row = FALSE, legend_breaks = -1:4, legend_labels = c("0", +#' "1e-4", "1e-3", "1e-2", "1e-1", "1")) +#' +#' # Fix cell sizes and save to file with correct size +#' pheatmap(test, cellwidth = 15, cellheight = 12, main = "Example heatmap") +#' pheatmap(test, cellwidth = 15, cellheight = 12, fontsize = 8, filename = "test.pdf") +#' +#' # Generate annotations for rows and columns +#' annotation_col = data.frame( +#' CellType = factor(rep(c("CT1", "CT2"), 5)), +#' Time = 1:5 +#' ) +#' rownames(annotation_col) = paste("Test", 1:10, sep = "") +#' +#' annotation_row = data.frame( +#' GeneClass = factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 6))) +#' ) +#' rownames(annotation_row) = paste("Gene", 1:20, sep = "") +#' +#' # Display row and color annotations +#' pheatmap(test, annotation_col = annotation_col) +#' pheatmap(test, annotation_col = annotation_col, annotation_legend = FALSE) +#' pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row) +#' +#' +#' # Specify colors +#' ann_colors = list( +#' Time = c("white", "firebrick"), +#' CellType = c(CT1 = "#1B9E77", CT2 = "#D95F02"), +#' GeneClass = c(Path1 = "#7570B3", Path2 = "#E7298A", Path3 = "#66A61E") +#' ) +#' +#' pheatmap(test, annotation_col = annotation_col, annotation_colors = ann_colors, main = "Title") +#' pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row, +#' annotation_colors = ann_colors) +#' pheatmap(test, annotation_col = annotation_col, annotation_colors = ann_colors[2]) +#' +#' # Gaps in heatmaps +#' pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14)) +#' pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14), +#' cutree_col = 2) +#' +#' # Show custom strings as row/col names +#' labels_row = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", +#' "", "", "Il10", "Il15", "Il1b") +#' +#' pheatmap(test, annotation_col = annotation_col, labels_row = labels_row) +#' +#' # Specifying clustering from distance matrix +#' drows = dist(test, method = "minkowski") +#' dcols = dist(t(test), method = "minkowski") +#' pheatmap(test, clustering_distance_rows = drows, clustering_distance_cols = dcols) +#' @export pheatmap +#' @importFrom grid unit textGrob gpar unit.c viewport convertWidth +#' @importFrom grid convertHeight gList gTree rectGrob grobTree polylineGrob +#' @importFrom grid grid.draw grid.pretty grid.newpage +#' @importFrom scales dscale hue_pal brewer_pal +#' @importFrom RColorBrewer brewer.pal +pheatmap = function(mat, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100), kmeans_k = NA, breaks = NA, border_color = "grey60", cellwidth = NA, cellheight = NA, scale = "none", cluster_rows = TRUE, cluster_cols = TRUE, clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean", clustering_method = "complete", cutree_rows = NA, cutree_cols = NA, treeheight_row = ifelse(cluster_rows, 50, 0), treeheight_col = ifelse(cluster_cols, 50, 0), legend = TRUE, legend_breaks = NA, legend_labels = NA, annotation_row = NA, annotation_col = NA, annotation = NA, annotation_colors = NA, annotation_legend = TRUE, drop_levels = TRUE, show_rownames = T, show_colnames = T, main = NA, fontsize = 10, fontsize_row = fontsize, fontsize_col = fontsize, display_numbers = F, number_format = "%.2f", number_color = "grey30", fontsize_number = 0.8 * fontsize, gaps_row = NULL, gaps_col = NULL, labels_row = NULL, labels_col = NULL, filename = NA, width = NA, height = NA, silent = FALSE, legend.cex = 1, txt.stats = NA, ...){ + +# Set labels + if(is.null(labels_row)){ + labels_row = rownames(mat) + } + if(is.null(labels_col)){ + labels_col = colnames(mat) + } + +# Preprocess matrix + mat = as.matrix(mat) + if(scale != "none"){ + mat = scale_mat(mat, scale) + if(is.na(breaks)){ + breaks = generate_breaks(mat, length(color), center = T) + } + } + + +# Kmeans + if(!is.na(kmeans_k)){ +# Cluster data + km = kmeans(mat, kmeans_k, iter.max = 100) + mat = km$centers + +# Compose rownames + t = table(km$cluster) + labels_row = sprintf("Cluster: %s Size: %d", names(t), t) + } + else{ + km = NA + } + +# Format numbers to be displayed in cells + if(is.matrix(display_numbers) | is.data.frame(display_numbers)){ + if(nrow(display_numbers) != nrow(mat) | ncol(display_numbers) != ncol(mat)){ + stop("If display_numbers provided as matrix, its dimensions have to match with mat") + } + + display_numbers = as.matrix(display_numbers) + fmat = matrix(as.character(display_numbers), nrow = nrow(display_numbers), ncol = ncol(display_numbers)) + fmat_draw = TRUE + + } + else{ + if(display_numbers){ + fmat = matrix(sprintf(number_format, mat), nrow = nrow(mat), ncol = ncol(mat)) + fmat_draw = TRUE + } + else{ + fmat = matrix(NA, nrow = nrow(mat), ncol = ncol(mat)) + fmat_draw = FALSE + } + } + +# Do clustering + if(cluster_rows){ + tree_row = cluster_mat(mat, distance = clustering_distance_rows, method = clustering_method) + mat = mat[tree_row$order, , drop = FALSE] + fmat = fmat[tree_row$order, , drop = FALSE] + labels_row = labels_row[tree_row$order] + if(!is.na(cutree_rows)){ + gaps_row = find_gaps(tree_row, cutree_rows) + } + else{ + gaps_row = NULL + } + } + else{ + tree_row = NA + treeheight_row = 0 + } + + if(cluster_cols){ + tree_col = cluster_mat(t(mat), distance = clustering_distance_cols, method = clustering_method) + mat = mat[, tree_col$order, drop = FALSE] + fmat = fmat[, tree_col$order, drop = FALSE] + labels_col = labels_col[tree_col$order] + if(!is.na(cutree_cols)){ + gaps_col = find_gaps(tree_col, cutree_cols) + } + else{ + gaps_col = NULL + } + } + else{ + tree_col = NA + treeheight_col = 0 + } + + attr(fmat, "draw") = fmat_draw + +# Colors and scales + if(!is.na(legend_breaks[1]) & !is.na(legend_labels[1])){ + if(length(legend_breaks) != length(legend_labels)){ + stop("Lengths of legend_breaks and legend_labels must be the same") + } + } + + + if(is.na(breaks[1])){ + breaks = generate_breaks(as.vector(mat), length(color)) + } + if (legend & is.na(legend_breaks[1])) { + legend = grid.pretty(range(as.vector(breaks))) + names(legend) = legend + } + else if(legend & !is.na(legend_breaks[1])){ + legend = legend_breaks[legend_breaks >= min(breaks) & legend_breaks <= max(breaks)] + + if(!is.na(legend_labels[1])){ + legend_labels = legend_labels[legend_breaks >= min(breaks) & legend_breaks <= max(breaks)] + names(legend) = legend_labels + } + else{ + names(legend) = legend + } + } + else { + legend = NA + } + mat = scale_colours(mat, col = color, breaks = breaks) + +# Preparing annotations + if(is.na(annotation_col[[1]][1]) & !is.na(annotation[[1]][1])){ + annotation_col = annotation + } +# Select only the ones present in the matrix + if(!is.na(annotation_col[[1]][1])){ + annotation_col = annotation_col[colnames(mat), , drop = F] + } + + if(!is.na(annotation_row[[1]][1])){ + annotation_row = annotation_row[rownames(mat), , drop = F] + } + + annotation = c(annotation_row, annotation_col) + annotation = annotation[unlist(lapply(annotation, function(x) !is.na(x[1])))] + if(length(annotation) != 0){ + annotation_colors = generate_annotation_colours(annotation, annotation_colors, drop = drop_levels) + } + else{ + annotation_colors = NA + } + + if(!show_rownames){ + labels_row = NULL + } + + if(!show_colnames){ + labels_col = NULL + } + +# Draw heatmap + gt = heatmap_motor(mat, border_color = border_color, cellwidth = cellwidth, cellheight = cellheight, treeheight_col = treeheight_col, treeheight_row = treeheight_row, tree_col = tree_col, tree_row = tree_row, filename = filename, width = width, height = height, breaks = breaks, color = color, legend = legend, annotation_row = annotation_row, annotation_col = annotation_col, annotation_colors = annotation_colors, annotation_legend = annotation_legend, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, fmat = fmat, fontsize_number = fontsize_number, number_color = number_color, gaps_row = gaps_row, gaps_col = gaps_col, labels_row = labels_row, labels_col = labels_col, legend.cex = legend.cex, txt.stats = txt.stats, ...) + +#gtsub = heatmap_motor(mat, border_color = border_color, cellwidth = cellwidth, cellheight = cellheight, treeheight_col = treeheight_col, treeheight_row = treeheight_row, tree_col = tree_col, tree_row = tree_row, filename = filename, width = width, height = height, breaks = breaks, color = color, legend = legend, annotation_row = annotation_row, annotation_col = annotation_col, annotation_colors = annotation_colors, annotation_legend = annotation_legend, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, fmat = fmat, fontsize_number = fontsize_number, number_color = number_color, gaps_row = gaps_row, gaps_col = gaps_col, labels_row = labels_row, labels_col = labels_col, legend.cex = legend.cex, txt.stats = txt.stats, ...) + +#gt = rbind(gt, gtsub, size='max') + + if(is.na(filename) & !silent){ + grid.newpage() + grid.draw(gt) + } + + invisible(list(tree_row = tree_row, tree_col = tree_col, kmeans = km, gtable = gt)) + } + + +# @importFrom circlize circos.clear circos.par chordDiagram +# @importFrom circlize circos.trackPlotRegion circos.text + pattern.plot = function(x, + group, + to, + gap.cex = 1.0, + legend.cex = 1.0, + label.cex = 1.0, + title=paste(to[1], to[2]), + mode = 'barplot') + { +# keys + events = as.events(x) + + keys = NULL + for(i in 1:nrow(group)) + keys = c(keys, rownames(as.events(x, genes=group[i, 'event'], types=group[i, 'type']))) + + cat('Group:\n') + events.names = events[keys, , drop = FALSE] + print(events.names) + + + cat('Group tested against:', to[1], to[2], '\n') + +# HARD exclusivity: 1 for each pattern element +# CO-OCCURRENCE: 1 +# SOFT exclusivity: 1 +# OTHERS: 1 + matrix = matrix(0, nrow = length(keys) + 3, ncol = 1) + rownames(matrix) = c(keys, 'soft', 'co-occurrence', 'other') +# colnames(matrix) = paste(to, collapse=':') + colnames(matrix) = to[1] + + to.samples = which.samples(x, gene=to[1], type=to[2]) + cat('Pattern conditioned to samples:', paste(to.samples, collapse=', '), '\n') + + pattern.samples = list() + hard.pattern.samples = list() + for(i in 1:length(keys)) + { +# Samples + samples = which.samples(x, gene= events.names[i, 'event'], type= events.names[i, 'type']) + +# Pattern samples + pattern.samples = append(pattern.samples, list(samples)) + +# Other negative samples + negative.samples = list() + for(j in 1:length(keys)) + if(i != j) + negative.samples = append(negative.samples, + list(which.samples(x, gene= events.names[j, 'event'], type= events.names[j, 'type'], neg = TRUE))) + + pattern.negative = Reduce(intersect, negative.samples) + hard.pattern.samples = append(hard.pattern.samples, list(intersect(samples, pattern.negative))) + } + names(pattern.samples) = keys + names(hard.pattern.samples) = keys + +# print(hard.pattern.samples) + +# print('tutti:') +# print(unlist(pattern.samples)) + +# CO-OCCURRENCES + pattern.co.occurrences = Reduce(intersect, pattern.samples) + co.occurrences = intersect(to.samples, pattern.co.occurrences) + matrix['co-occurrence', ] = length(co.occurrences) + cat('Co-occurrence in #samples: ', matrix['co-occurrence', ], '\n') + +# HARD EXCLUSIVITY + for(i in 1:length(keys)) + matrix[keys[i], ] = length(intersect(to.samples, hard.pattern.samples[[keys[i]]])) + cat('Hard exclusivity in #samples:', matrix[keys, ], '\n') + +# OTHER + union = unique(unlist(pattern.samples)) + union = setdiff(as.samples(x), union) + matrix['other', ] = length(intersect(to.samples, union)) + cat('Other observations in #samples:', matrix['other', ], '\n') + +# SOFT EXCLUSIVITY + matrix['soft', ] = length(to.samples) - colSums(matrix) + cat('Soft exclusivity in #samples:', matrix['soft', ], '\n') + +# Choose colors according to event type + sector.color = rep('gray', nrow(matrix) + 1) + link.color = rep('gray', nrow(matrix)) + + names(sector.color) = c(rownames(matrix), colnames(matrix)) + names(link.color) = rownames(matrix) + + add.alpha <- function(col, alpha=1) + { + if(missing(col)) stop("Please provide a vector of colours.") + apply(sapply(col, col2rgb)/255, 2, function(x) rgb(x[1], x[2], x[3], alpha=alpha)) + } + +# Link colors - event types + for(i in 1:length(keys)) + link.color[keys[i]] = as.colors(x)[events.names[keys[i], 'type' ]] + +# print(link.color) + + link.color['soft'] = 'orange' + link.color['co-occurrence'] = 'darkgreen' + + + idx.max = which(matrix == max(matrix)) + link.style = matrix(0, nrow=nrow(matrix), ncol=ncol(matrix)) + rownames(link.style) = rownames(matrix) + colnames(link.style) = colnames(matrix) + link.style[idx.max, 1] = 5 +# print(link.style) + +# link.color[idx.max] = add.alpha(link.color[idx.max], .3) +# print(link.color) + + +# Sector colors - event types + sector.color[1:length(keys)] = 'red' # XOR + sector.color['soft'] = 'orange' # OR + sector.color['co-occurrence'] = 'darkgreen' # AND + sector.color[colnames(matrix)] = as.colors(x)[as.events(x, genes = to[1], types=to[2])[, 'type' ]] + +# print(link.color) +# print(sector.color) + +# Informative labels + for(i in 1:length(keys)) + { +# rownames(matrix)[i] = paste(paste(rep(' ', i), collapse=''), events.names[i, 'event' ]) + if(nevents(x, genes = events.names[i, 'event' ]) > 1) + rownames(matrix)[i] = paste(paste(rep(' ', i), collapse=''), events.names[i, 'event' ]) + else rownames(matrix)[i] = events.names[i, 'event' ] + + names(sector.color)[i] = rownames(matrix)[i] + } + + + if( mode == 'circos') + { + + cat('Circlize matrix.\n') + print(matrix) + + circos.clear() + + gaps = c(rep(2, length(keys) - 2), rep(15 * gap.cex, 4), rep(40 * gap.cex, 2)) + + circos.par(gap.degree = gaps) + +# matrix = matrix[ order(matrix[, 1]) , , drop = FALSE] +# matrix = matrix[ order(matrix[, 1], decreasing = T) , , drop = FALSE] +# print(matrix) + + chordDiagram(matrix, + grid.col = sector.color, + annotationTrack = "grid", + preAllocateTracks = list(track.height = 0.3), + row.col = link.color, + link.border = 'black', + link.lty = link.style, + link.lwd = 0.3 + ) + +# for(si in get.all.sector.index()) +# { +# # here the index for the grid track is 2 +# circos.axis(h = "top", labels.cex = 0.3, major.tick.percentage = .4, sector.index = si, track.index = 2) +# } + + circos.trackPlotRegion( + track.index = 1, + panel.fun = function(x, y) + { + xlim = get.cell.meta.data("xlim") + ylim = get.cell.meta.data("ylim") + sector.name = get.cell.meta.data("sector.index") + circos.text(mean(xlim), cex = 1.0 * label.cex, ylim[1], sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5)) + }, + bg.border = NA) + + } + else # barplot + { + layout(matrix(c(1,2,3,3), ncol=2, byrow=TRUE), heights=c(4, 1)) + par(mai=rep(0.5, 4)) + + basic.unit = 2 + widths = c( + rep(basic.unit, length(keys)), + rep(basic.unit * 2, 3) + ) + + spaces = c( + rep(0, length(keys)), + 3, + rep(.5, 2) + ) + + print(matrix) + +# barplot(matrix[, 1], widths, space = 0) + + rownames(matrix)[length(keys) + 1] = '2 or more\n(soft-exclusivity)' + rownames(matrix)[length(keys) + 2] = 'all together\n(co-occurrence)' + + rownames(matrix)[nrow(matrix)] = 'none of the\nevents' + + + summary = matrix[ (length(keys) + 1):nrow(matrix), 1, drop = FALSE] + summary = rbind( sum(matrix[1:length(keys),]), summary) + rownames(summary) = NULL + colnames(summary) = paste(to[1], to[2]) + print(summary) + +## Plot, but suppress the labels + + + midpts <- barplot( + summary, + 5, + col = c('red', 'orange', 'darkgreen', 'gray'), + horiz = F, + space = 1, + las = 1, + main = paste0('Combination of group events \n in ', sum(summary),' samples with ', to[1],' ', to[2]), + cex.main = .6, + cex.names = .5, + cex.axis = .5 ) + + exclus = matrix[1:length(keys), 1, drop = FALSE] +# print(exclus) + + events.legend.pch = rep(19, length(unique(events.names[, 'type']))) + + + midpts <- barplot( + exclus[, 1], +# widths, + col = link.color[1:length(keys)], + horiz = T, + space = 1, + las = 2, + main = paste0('Observations supporting \n hard-exclusivity'), + cex.main = .6, +# xlab = 'number of observations (given KRAS)', + cex.names=.5, + cex.axis=.5 ) + + par(mai=c(0,0,0,0)) + plot.new() + + + legend("topleft", + cex = 0.6 * legend.cex, + pt.cex = 1, + title = expression(bold('Table of observations')), + horiz=F, + bty='n', + legend = c( + paste(sum(matrix[1:length(keys) ,]), 'with 1 event (hard exclusivity)'), + paste(matrix[length(keys) + 1, ], 'with 2 or more events'), + paste(matrix[length(keys) + 2, ], 'with all events (co-occurrence)'), + paste(matrix[length(keys) + 3, ], 'with no events') + ), + fill= c('red', 'orange', 'darkgreen', 'gray') + ) + + group.legend = apply(group, 1, paste, collapse=' in ') + + legend('top', + cex = .6 * legend.cex, + pt.cex = 1, + title = expression(bold('Input group')), + bty='n', + legend = group.legend + ) + + + legend(x = 'topright', + legend = unique(events.names[, 'type']), + title = expression(bold('Events type')), + bty = 'n', + inset = +.05, + cex = .6 * legend.cex, + pt.cex = 1, + pch = events.legend.pch, + col = as.colors(x)[unique(events.names[, 'type'])] +# pt.bg = pt_bg + ) + + + + + } + + } + diff --git a/R/which.functions.R b/R/which.functions.R new file mode 100644 index 00000000..3893fb6d --- /dev/null +++ b/R/which.functions.R @@ -0,0 +1,42 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## + +#' Return a list of samples with specified alteration +#' @title which.samples +#' +#' @examples +#' data(test_dataset) +#' which.samples(test_dataset, 'TET2', 'ins_del') +#' which.samples(test_dataset, 'TET2', 'ins_del', neg=TRUE) +#' +#' @param x A TRONCO compliant dataset. +#' @param gene A list of gene names +#' @param type A list of types +#' @param neg If FALSE return the list, if TRUE return as.samples() - list +#' @return A list of sample +#' @export which.samples +which.samples = function(x, gene, type, neg = FALSE) +{ + data = as.gene(x, genes = gene, types = type) + data = data[data == 1, , drop = FALSE] + + samples = as.samples(x) + + if(neg) { + return(setdiff(samples, rownames(data))) + } else { + return(rownames(data)) + } +} diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 00000000..3bad733f --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,14 @@ +################################################################################## +# # +# TRONCO: a tool for TRanslational ONCOlogy # +# # +################################################################################## +# Copyright (c) 2015, Marco Antoniotti, Giulio Caravagna, Luca De Sano, # +# Alex Graudenzi, Ilya Korsunsky, Mattia Longoni, Loes Olde Loohuis, # +# Giancarlo Mauri, Bud Mishra and Daniele Ramazzotti. # +# # +# All rights reserved. This program and the accompanying materials # +# are made available under the terms of the GNU GPL v3.0 # +# which accompanies this distribution # +# # +################################################################################## \ No newline at end of file diff --git a/README.md b/README.md index e95393d7..49821bb2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ -TRanslational ONCOlogy -====================== +TRONCO (TRanslational ONCOlogy) +=============================== -A R Library that implements some algorithms for the "cancer progression reconstruction problem". +**TRONCO** is a **R** package which collects algorithms to infer *progression models* from Bernoulli 0/1 profiles of genomic alterations across a tumor sample. -Download latest realease of TRONCO v2 [here](https://github.com/BIMIB-DISCo/TRONCO/releases) +Such profiles are usually visualised as a binary input matrix where each row represents a patient’s sample (e.g., the result of a sequenced tumor biopsy), and each column an event relevant to the progression (a certain type of somatic mutation, a focal or higher-level chromosomal copy number alteration etc.); a 0/1 value models the absence/presence of that alteration in the sample. + +In this version of **TRONCO** such profiles can be readily imported by boolean matrices and *MAF* or *GISTIC* files. The package provides various functions to editing, visualise and subset such data, as well as functions to query the Cbio portal for cancer genomics. + +This version of TRONCO comes with the parallel implementations the **CAPRESE** [*PLoS ONE 9(12): e115570*] and **CAPRI** [*Bioinformatics, doi:10.1093/bioinformatics/btv296*] algorithms to infer possible progression models arranged as trees, or general direct acyclic graphs. Bootstrap functions to assess the parametric, non-prametric and statistical confidence of every inferred model are also provided. The package comes with some data available as well, which include the dataset of *Atypical Chronic Myeloid Leukemia samples* provided by Piazza et al., Nat. Genet., 45 (2013), and examples. diff --git a/data/aCML.RData b/data/aCML.RData new file mode 100644 index 00000000..76d85ba2 Binary files /dev/null and b/data/aCML.RData differ diff --git a/data/as.events.test.RData b/data/as.events.test.RData new file mode 100644 index 00000000..c01d8829 Binary files /dev/null and b/data/as.events.test.RData differ diff --git a/data/as.genotypes.test.RData b/data/as.genotypes.test.RData new file mode 100644 index 00000000..3fa3d820 Binary files /dev/null and b/data/as.genotypes.test.RData differ diff --git a/data/as.stages.test.RData b/data/as.stages.test.RData new file mode 100644 index 00000000..d78395a4 Binary files /dev/null and b/data/as.stages.test.RData differ diff --git a/data/events.rda b/data/events.rda deleted file mode 100644 index 51d7e5e5..00000000 Binary files a/data/events.rda and /dev/null differ diff --git a/data/events.txt b/data/events.txt deleted file mode 100644 index cabd982a..00000000 --- a/data/events.txt +++ /dev/null @@ -1,7 +0,0 @@ -8q+ , gain , 1 -3q+ , gain , 2 -5q- , loss , 3 -4q- , loss , 4 -8p- , loss , 5 -1q+ , gain , 6 -Xp- , loss , 7 diff --git a/data/gistic.txt b/data/gistic.txt new file mode 100644 index 00000000..c22ad82b --- /dev/null +++ b/data/gistic.txt @@ -0,0 +1,65 @@ +Hugo_Symbol Entrez_Gene_Id TCGA-A6-2670 TCGA-A6-2671 TCGA-A6-2672 TCGA-A6-2674 TCGA-A6-2675 TCGA-A6-2676 +ACAP3 116983 -1 0 0 0 0 0 +AGRN 375790 -1 0 0 0 0 0 +ATAD3A 55210 -1 0 0 0 0 0 +ATAD3B 83858 -1 0 0 0 0 0 +ATAD3C 219293 -1 0 0 0 0 0 +AURKAIP1 54998 2 2 2 0 0 2 +B3GALT6 126792 -1 0 0 1 0 0 +C1orf159 54991 -1 0 0 0 0 0 +C1orf170 84808 -1 0 0 0 0 0 +C1orf70 339453 -1 0 0 1 0 -2 +CCNL2 81669 -1 0 0 1 0 0 +CDC2L1 984 2 0 0 1 0 0 +CDC2L2 728642 -1 0 2 0 0 0 +CPSF3L 54973 -1 0 0 0 0 0 +DVL1 1855 -1 0 0 0 0 -2 +FAM132A 388581 -1 0 -2 0 0 0 +FAM138A 645520 0 0 0 0 0 0 +FAM138C 654835 0 0 0 0 0 0 +FAM138F 641702 0 0 0 0 0 0 +FAM41C 284593 -1 0 -2 0 0 0 +FLJ39609 100130417 -1 0 0 0 0 0 +GLTPD1 80772 -1 0 0 0 0 2 +GNB1 2782 -1 0 0 0 0 2 +HES4 57801 -1 0 0 0 0 0 +ISG15 9636 -1 0 0 1 0 0 +KLHL17 339451 -1 0 0 1 0 0 +LOC100128842 100128842 -1 0 0 0 0 0 +LOC100132062 100132062 -1 0 0 0 0 0 +LOC100132287 100132287 -1 0 -1 0 0 0 +LOC100133331 100133331 -1 0 0 0 0 0 +LOC100288778 100288778 -1 0 0 0 0 0 +LOC148413 148413 -1 0 0 0 0 0 +LOC441869 441869 -1 0 0 0 0 0 +LOC643837 643837 -1 0 0 0 0 0 +LOC728661 728661 -1 0 0 0 0 0 +MIB2 142678 -1 0 0 0 0 0 +MIR200A 406983 -1 0 0 0 0 0 +MIR200B 406984 -1 0 0 0 0 0 +MIR429 554210 -1 0 0 0 0 0 +MMP23A 8511 -1 0 0 0 0 0 +MMP23B 8510 -1 0 0 0 0 0 +MRPL20 55052 -1 0 0 0 0 0 +MXRA8 54587 -1 0 0 0 0 0 +NADK 65220 -1 0 0 -1 2 0 +NCRNA00115 79854 -1 0 0 0 2 0 +NOC2L 26155 -1 0 0 0 2 1 +OR4F16 81399 -1 0 0 0 2 0 +OR4F29 729759 -1 0 0 -1 0 1 +OR4F3 26683 -1 0 -1 0 0 1 +OR4F5 79501 -1 0 0 0 0 1 +PLEKHN1 84069 -1 0 0 0 0 0 +PUSL1 126789 -1 0 0 0 0 0 +SAMD11 148398 -1 0 0 0 0 0 +SCNN1D 6339 -1 0 0 0 0 0 +SDF4 51150 -1 0 0 0 0 0 +SLC35E2 9906 -1 0 0 0 0 0 +SSU72 29101 -1 0 0 0 0 0 +TAS1R3 83756 -1 0 0 0 0 0 +TMEM88B 643965 -1 0 0 0 0 0 +TNFRSF18 8784 -1 0 0 0 0 0 +TNFRSF4 7293 -1 0 0 0 0 0 +TTLL10 254173 -1 0 0 0 0 0 +UBE2J2 118424 -1 0 0 0 0 0 +VWA1 64856 -1 0 0 0 0 0 diff --git a/data/maf.csv b/data/maf.csv new file mode 100644 index 00000000..ab0ad230 --- /dev/null +++ b/data/maf.csv @@ -0,0 +1,17 @@ +Hugo_Symbol;Entrez_Gene_Id;Center;NCBI_Build;Chromosome;Start_position;End_position;Strand;Variant_Classification;Variant_Type;Reference_Allele;Tumor_Seq_Allele1;Tumor_Seq_Allele2;dbSNP_RS;dbSNP_Val_Status;Tumor_Sample_Barcode;Matched_Norm_Sample_Barcode;Match_Norm_Seq_Allele1;Match_Norm_Seq_Allele2;Tumor_Validation_Allele1;Tumor_Validation_Allele2;Match_Norm_Validation_Allele1;Match_Norm_Validation_Allele2;Verification_Status;Validation_Status;Mutation_Status;Sequencing_Phase;Sequence_Source;Validation_Method;Score;BAM_file;Sequencer;TranscriptID;Exon;ChromChange;AAChange;COSMIC_Codon;COSMIC_Gene;Drug_Target;MA:link.var;MA;MA:variant;MA:protein.change;MA:Gene;MA:link.MSA;MA:link.PDB;MA:FImpact;MA:FI.score;MA:Cytoband;MA:Uniprot;MA:Refseq;MA:MSA_height;MA:Func.region;MA:TumorSuppressor;MA:Oncogene;;;;;;;;;;; +A2BP1;54715;hgsc.bcm.edu;36;16;7508327;7508327;+;Missense_Mutation;SNP;G;G;A;novel;unknown;TCGA-A6-2672-01A-01W-0833-10;TCGA-A6-2672-10A-01W-0833-10;G;G;.;.;.;.;Unknown;Unknown;Somatic;Phase_I;Capture;.;;;SOLID;NM_145893;exon2;c.G265A;p.A89T;.;.;.;http://getma.org/v1/?cm=var&var=hg18;16;7508327;G;A&fts=all;1;hg18;16;7508327;G;A;A69T;A2BP1;http://getma.org/v1/?cm=msa&ty=f&p=RFOX1_HUMAN&rb=1&re=200&var=A69T;http://getma.org/v1/pdb.php?prot=RFOX1_HUMAN&from=1&to=200&var=A69T;neutral;0.74;16p13.3;RFOX1_HUMAN;NP_061193;394;;;;;; +ABCA10;10349;hgsc.bcm.edu;36;17;64659790;64659790;+;Silent;SNP;A;A;C;novel;unknown;TCGA-A6-2672-01A-01W-0833-10;TCGA-A6-2672-10A-01W-0833-10;A;A;A;C;A;A;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_080282;exon37;c.T4386G;p.A1462A;.;.;.;http://getma.org/v1/?cm=var&var=hg18;17;64659790;A;C&fts=all;1;hg18;17;64659790;A;C;A1462A;ABCA10;;;;;17q24;ABCAA_HUMAN;NP_525021;;;;;;; +ABCA10;10349;hgsc.bcm.edu;36;17;64698988;64698988;+;Silent;SNP;A;A;G;novel;unknown;TCGA-A6-2672-01A-01W-0833-10;TCGA-A6-2672-10A-01W-0833-10;A;A;A;G;A;A;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_080282;exon18;c.T1935C;p.P645P;.;.;.;http://getma.org/v1/?cm=var&var=hg18;17;64698988;A;G&fts=all;1;hg18;17;64698988;A;G;P645P;ABCA10;;;;;17q24;ABCAA_HUMAN;NP_525021;;;;;;; +ABCA3;21;hgsc.bcm.edu;36;16;2290121;2290121;+;Silent;SNP;C;C;A;novel;unknown;TCGA-A6-2672-01A-01W-0833-10;TCGA-A6-2672-10A-01W-0833-10;C;C;C;A;C;C;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_001089;exon13;c.G1497T;p.A499A;.;.;.;http://getma.org/v1/?cm=var&var=hg18;16;2290121;C;A&fts=all;1;hg18;16;2290121;C;A;A499A;ABCA3;;;;;16p13.3;ABCA3_HUMAN;NP_001080;;;;;;; +ABCA4;24;hgsc.bcm.edu;36;1;94263159;94263159;+;Missense_Mutation;SNP;G;G;T;novel;unknown;TCGA-A6-2672-01A-01W-0833-10;TCGA-A6-2672-10A-01W-0833-10;G;G;G;T;G;G;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_000350;exon31;c.C4573A;p.L1525M;.;.;.;http://getma.org/v1/?cm=var&var=hg18;1;94263159;G;T&fts=all;1;hg18;1;94263159;G;T;L1525M;ABCA4;http://getma.org/v1/?cm=msa&ty=f&p=ABCA4_HUMAN&rb=1487&re=1595&var=L1525M;;medium;2375;1p22.1-p21;ABCA4_HUMAN;NP_000341;37;;;;;; +ACSM5;54988;hgsc.bcm.edu;36;16;20337086;20337086;+;Missense_Mutation;SNP;C;C;T;novel;unknown;TCGA-A6-2674-01A-02W-0831-10;TCGA-A6-2674-10A-01W-0831-10;C;C;C;T;C;C;Unknown;Valid;Somatic;Phase_I;Capture;454_PCR_WGA;;;SOLID;NM_017888;exon3;c.C409T;p.R137W;.;.;.;http://getma.org/v1/?cm=var&var=hg18;16;20337086;C;T&fts=all;1;hg18;16;20337086;C;T;R137W;ACSM5;http://getma.org/v1/?cm=msa&ty=f&p=ACSM5_HUMAN&rb=91&re=502&var=R137W;http://getma.org/v1/pdb.php?prot=ACSM5_HUMAN&from=91&to=502&var=R137W;medium;3205;16p12.3;ACSM5_HUMAN;NP_060358;396;;;;;; +ADCY2;108;hgsc.bcm.edu;36;5;7819897;7819897;+;Missense_Mutation;SNP;C;C;T;novel;unknown;TCGA-A6-2674-01A-02W-0831-10;TCGA-A6-2674-10A-01W-0831-10;C;C;C;T;C;C;Unknown;Valid;Somatic;Phase_I;Capture;454_PCR_WGA;;;SOLID;NM_020546;exon17;c.C2192T;p.A731V;.;.;.;http://getma.org/v1/?cm=var&var=hg18;5;7819897;C;T&fts=all;1;hg18;5;7819897;C;T;A731V;ADCY2;http://getma.org/v1/?cm=msa&ty=f&p=ADCY2_HUMAN&rb=601&re=800&var=A731V;;low;1845;5p15.3;ADCY2_HUMAN;NP_065433;68;;;;;; +APC;324;hgsc.bcm.edu;36;5;112192485;112192485;+;Nonsense_Mutation;SNP;C;C;T;novel;unknown;TCGA-A6-2674-01A-02W-0831-10;TCGA-A6-2674-10A-01W-0831-10;C;C;.;.;.;.;Unknown;Unknown;Somatic;Phase_I;Capture;.;;;SOLID;NM_001127511;exon12;c.C1606T;p.R536X; APC;ovary;NS;Substitution - Nonsense;0;.;.;http://getma.org/v1/?cm=var&var=hg18;5;112192485;C;T&fts=all;1;hg18;5;112192485;C;T;R554;APC;;;;;5q21-q22;APC_HUMAN;NP_000029;;;1 +ASB16;92591;hgsc.bcm.edu;36;17;39610531;39610531;+;Missense_Mutation;SNP;G;G;A;novel;unknown;TCGA-A6-2674-01A-02W-0831-10;TCGA-A6-2674-10A-01W-0831-10;G;G;G;A;G;G;Unknown;Valid;Somatic;Phase_I;Capture;454_PCR_WGA;;;SOLID;NM_080863;exon4;c.G1091A;p.R364Q;.;.;.;http://getma.org/v1/?cm=var&var=hg18;17;39610531;G;A&fts=all;1;hg18;17;39610531;G;A;R364Q;ASB16;http://getma.org/v1/?cm=msa&ty=f&p=ASB16_HUMAN&rb=315&re=407&var=R364Q;;medium;2.19;17q21.31;ASB16_HUMAN;NP_543139;30;;;;;; +A2BP1;54715;hgsc.bcm.edu;36;16;7700697;7700697;+;Silent;SNP;A;A;G;novel;unknown;TCGA-A6-2676-01A-01W-0833-10;TCGA-A6-2676-10A-01W-0833-10;A;A;A;G;A;A;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_145891;exon13;c.A1206G;p.S402S;.;.;.;http://getma.org/v1/?cm=var&var=hg18;16;7700697;A;G&fts=all;1;hg18;16;7700697;A;G;S381S;A2BP1;;;;;16p13.3;RFOX1_HUMAN;NP_061193;;;;;;; +ABAT;18;hgsc.bcm.edu;36;16;8769566;8769566;+;Missense_Mutation;SNP;G;G;A;novel;unknown;TCGA-A6-2676-01A-01W-0833-10;TCGA-A6-2676-10A-01W-0833-10;G;G;G;A;G;G;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_000663;exon10;c.G619A;p.D207N;.;.;.;http://getma.org/v1/?cm=var&var=hg18;16;8769566;G;A&fts=all;1;hg18;16;8769566;G;A;D207N;ABAT;http://getma.org/v1/?cm=msa&ty=f&p=GABT_HUMAN&rb=76&re=446&var=D207N;http://getma.org/v1/pdb.php?prot=GABT_HUMAN&from=76&to=446&var=D207N;low;1.36;16p13.2;GABT_HUMAN;NP_065737;423;;;;;; +ABCA10;10349;hgsc.bcm.edu;36;17;64722504;64722505;+;Frame_Shift_Ins;INS;-;-;A;novel;unknown;TCGA-A6-2676-01A-01W-0833-10;TCGA-A6-2676-10A-01W-0833-10;-;-;-;A;-;-;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_080282;exon10;c.942_943insT;p.F314fs;.;.;.;;;;;;;;;;;;;;;;;;;;;;;;;;; +ABCC3;8714;hgsc.bcm.edu;36;17;46108744;46108744;+;Silent;SNP;C;C;T;novel;unknown;TCGA-A6-2676-01A-01W-0833-10;TCGA-A6-2676-10A-01W-0833-10;C;C;C;T;C;C;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_003786;exon23;c.C3174T;p.S1058S;.;.;.;http://getma.org/v1/?cm=var&var=hg18;17;46108744;C;T&fts=all;1;hg18;17;46108744;C;T;S1058S;ABCC3;;;;;17q22;MRP3_HUMAN;NP_003777;;;;;;; +ABHD12;26090;hgsc.bcm.edu;36;20;25248888;25248888;+;Silent;SNP;G;G;T;novel;unknown;TCGA-A6-2676-01A-01W-0833-10;TCGA-A6-2676-10A-01W-0833-10;G;G;G;T;G;G;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_001042472;exon4;c.C489A;p.A163A;.;.;.;http://getma.org/v1/?cm=var&var=hg18;20;25248888;G;T&fts=all;1;hg18;20;25248888;G;T;A163A;ABHD12;;;;;20p11.21;ABD12_HUMAN;NP_056415;;;;;;; +ABR;29;hgsc.bcm.edu;36;17;908746;908746;+;Missense_Mutation;SNP;G;G;A;novel;unknown;TCGA-A6-2676-01A-01W-0833-10;TCGA-A6-2676-10A-01W-0833-10;G;G;G;A;G;G;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_021962;exon11;c.C1294T;p.R432W;.;.;.;http://getma.org/v1/?cm=var&var=hg18;17;908746;G;A&fts=all;1;hg18;17;908746;G;A;R432W;ABR;http://getma.org/v1/?cm=msa&ty=f&p=ABR_HUMAN&rb=302&re=459&var=R432W;;low;1.73;17p13.3;ABR_HUMAN;NP_068781;44;;;;;; +ACAD11;84129;hgsc.bcm.edu;36;3;133820174;133820174;+;Missense_Mutation;SNP;C;C;T;novel;unknown;TCGA-A6-2676-01A-01W-0833-10;TCGA-A6-2676-10A-01W-0833-10;C;C;C;T;C;C;Unknown;Valid;Somatic;Phase_I;Capture;Illumina;;;SOLID;NM_032169;exon11;c.G1408A;p.A470T;.;.;.;http://getma.org/v1/?cm=var&var=hg18;3;133820174;C;T&fts=all;1;hg18;3;133820174;C;T;A470T;ACAD11;http://getma.org/v1/?cm=msa&ty=f&p=ACD11_HUMAN&rb=376&re=499&var=A470T;http://getma.org/v1/pdb.php?prot=ACD11_HUMAN&from=376&to=499&var=A470T;high;4315;3q22.1;ACD11_HUMAN;NP_115545;523;;;;;; diff --git a/data/muts.RData b/data/muts.RData new file mode 100644 index 00000000..92d6b433 Binary files /dev/null and b/data/muts.RData differ diff --git a/data/ov.cgh.rda b/data/ov.cgh.rda deleted file mode 100644 index 7dd46d23..00000000 Binary files a/data/ov.cgh.rda and /dev/null differ diff --git a/data/ov.cgh.txt b/data/ov.cgh.txt deleted file mode 100644 index 7d007ac0..00000000 --- a/data/ov.cgh.txt +++ /dev/null @@ -1,87 +0,0 @@ -0 0 0 0 0 1 0 -0 0 0 1 0 1 0 -1 1 1 1 0 0 0 -1 0 0 0 0 0 0 -0 0 0 0 0 0 0 -1 1 1 1 1 0 0 -1 1 1 1 1 0 1 -0 1 1 0 0 0 0 -0 1 0 0 0 0 1 -1 0 1 1 1 1 1 -0 0 0 0 0 0 0 -1 1 1 1 1 0 1 -1 1 1 1 1 1 1 -1 1 1 0 0 1 1 -1 0 0 0 0 0 0 -0 0 0 0 0 0 0 -1 1 0 0 0 1 1 -1 1 0 1 0 1 0 -1 0 1 1 0 1 0 -1 0 0 0 1 1 0 -1 0 1 1 1 1 1 -1 1 0 0 1 1 1 -1 1 1 1 1 1 0 -1 0 1 0 1 0 1 -0 0 0 0 0 0 0 -0 0 1 0 0 0 0 -1 1 1 1 1 0 1 -1 1 0 1 1 0 1 -1 1 1 1 1 0 1 -0 1 1 0 1 0 0 -1 1 1 1 1 0 1 -1 1 0 0 1 1 1 -1 0 1 1 1 0 1 -1 1 1 1 1 0 0 -1 1 1 0 1 1 1 -1 0 0 1 1 0 1 -1 0 0 1 1 1 0 -1 1 1 0 1 1 1 -1 1 1 1 1 0 1 -1 0 0 0 0 0 0 -1 1 0 1 0 1 0 -1 1 1 1 1 1 0 -0 0 1 1 0 1 1 -1 1 1 1 0 0 0 -1 1 1 1 1 0 1 -0 0 0 1 0 1 0 -0 0 0 0 0 0 0 -1 0 1 1 1 0 0 -0 0 0 0 0 1 0 -1 1 0 0 0 0 0 -1 1 1 1 0 1 0 -1 1 0 0 0 1 0 -1 1 0 0 0 0 0 -1 1 1 0 1 1 0 -1 0 0 0 0 0 0 -1 1 1 0 1 1 0 -1 1 0 1 1 0 1 -0 0 1 1 0 1 1 -1 0 0 0 0 0 0 -0 0 0 0 0 0 1 -1 1 0 0 0 0 0 -1 1 0 0 0 1 0 -0 0 1 1 1 1 1 -1 0 1 1 1 0 1 -0 0 1 0 0 0 1 -1 1 0 0 0 1 0 -0 1 0 0 0 1 0 -1 1 1 1 1 0 1 -1 1 1 1 1 0 0 -0 0 1 1 1 1 1 -1 0 1 1 1 1 0 -0 0 1 1 1 0 0 -1 0 0 0 0 0 0 -1 0 1 1 1 0 0 -1 1 1 0 1 0 1 -1 0 0 1 1 0 1 -0 0 0 0 0 0 0 -0 1 0 1 0 1 1 -1 1 0 0 1 1 1 -1 1 1 1 0 1 1 -0 1 1 1 0 1 1 -0 0 0 0 0 1 0 -1 1 1 1 1 0 1 -1 1 0 0 0 0 0 -1 1 1 1 0 1 0 -0 0 1 0 0 0 0 -1 1 0 0 0 0 0 diff --git a/data/stage.RData b/data/stage.RData new file mode 100644 index 00000000..46db1964 Binary files /dev/null and b/data/stage.RData differ diff --git a/data/test_dataset.RData b/data/test_dataset.RData new file mode 100644 index 00000000..7facc1a1 Binary files /dev/null and b/data/test_dataset.RData differ diff --git a/data/test_dataset_no_hypos.RData b/data/test_dataset_no_hypos.RData new file mode 100644 index 00000000..e34afd86 Binary files /dev/null and b/data/test_dataset_no_hypos.RData differ diff --git a/data/test_model.RData b/data/test_model.RData new file mode 100644 index 00000000..f905b44b Binary files /dev/null and b/data/test_model.RData differ diff --git a/data/types.rda b/data/types.rda deleted file mode 100644 index d231345b..00000000 Binary files a/data/types.rda and /dev/null differ diff --git a/data/types.txt b/data/types.txt deleted file mode 100644 index 760b147c..00000000 --- a/data/types.txt +++ /dev/null @@ -1,2 +0,0 @@ -loss , brown1 -gain , cornflowerblue diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index 79209b43..57c34aa2 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -2,8 +2,8 @@ \title{Rgraphviz News} \encoding{UTF-8} -\section{Version 0.99.2}{ +\section{Version 2.0.0-7}{ \itemize{ - \item First release + \item Beta release } } diff --git a/inst/doc/TRONCO-manual.pdf b/inst/doc/TRONCO-manual.pdf deleted file mode 100644 index 84e6af44..00000000 Binary files a/inst/doc/TRONCO-manual.pdf and /dev/null differ diff --git a/inst/doc/events.txt b/inst/doc/events.txt deleted file mode 100644 index cabd982a..00000000 --- a/inst/doc/events.txt +++ /dev/null @@ -1,7 +0,0 @@ -8q+ , gain , 1 -3q+ , gain , 2 -5q- , loss , 3 -4q- , loss , 4 -8p- , loss , 5 -1q+ , gain , 6 -Xp- , loss , 7 diff --git a/inst/doc/vignette.R b/inst/doc/vignette.R new file mode 100644 index 00000000..5b82685f --- /dev/null +++ b/inst/doc/vignette.R @@ -0,0 +1,197 @@ +### R code from vignette source 'vignette.Rnw' + +################################################### +### code chunk number 1: style-Sweave +################################################### +BiocStyle::latex() + + +################################################### +### code chunk number 2: vignette.Rnw:99-102 +################################################### +library(TRONCO) +data(aCML) +hide.progress.bar <<- TRUE + + +################################################### +### code chunk number 3: vignette.Rnw:106-107 +################################################### +show(aCML) + + +################################################### +### code chunk number 4: vignette.Rnw:111-112 +################################################### +as.events(aCML) + + +################################################### +### code chunk number 5: vignette.Rnw:116-117 +################################################### +as.genes(aCML) + + +################################################### +### code chunk number 6: vignette.Rnw:121-122 +################################################### +as.gene(aCML, genes='SETBP1') + + +################################################### +### code chunk number 7: vignette.Rnw:127-128 +################################################### +gene.hypotheses = c('KRAS', 'NRAS', 'IDH1', 'IDH2', 'TET2', 'SF3B1', 'ASXL1') + + +################################################### +### code chunk number 8: vignette.Rnw:135-136 +################################################### +alterations = events.selection(as.alterations(aCML), filter.freq = .05) + + +################################################### +### code chunk number 9: vignette.Rnw:141-142 +################################################### +dummy = oncoprint(alterations) + + +################################################### +### code chunk number 10: vignette.Rnw:144-145 +################################################### +capture.output(oncoprint(alterations, file='onco-1.pdf'), file='NUL') + + +################################################### +### code chunk number 11: vignette.Rnw:157-159 +################################################### +hypo = events.selection(aCML, filter.in.names=c(as.genes(alterations), gene.hypotheses)) +hypo = annotate.description(hypo, 'CAPRI - Bionformatics aCML data (selected events)') + + +################################################### +### code chunk number 12: vignette.Rnw:164-165 +################################################### +dummy = oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T) + + +################################################### +### code chunk number 13: vignette.Rnw:167-168 +################################################### +capture.output(oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T, file='onco-2.pdf'), file='NUL') + + +################################################### +### code chunk number 14: vignette.Rnw:178-179 +################################################### +hypo = hypothesis.add(hypo, 'NRAS xor KRAS', XOR('NRAS', 'KRAS')) + + +################################################### +### code chunk number 15: vignette.Rnw:184-185 (eval = FALSE) +################################################### +## hypo = hypothesis.add(hypo, 'NRAS or KRAS', OR('NRAS', 'KRAS')) + + +################################################### +### code chunk number 16: vignette.Rnw:190-191 +################################################### +dummy = oncoprint(events.selection(hypo, filter.in.names = c('KRAS', 'NRAS'))) + + +################################################### +### code chunk number 17: vignette.Rnw:193-194 +################################################### +capture.output(oncoprint(events.selection(hypo, filter.in.names = c('KRAS', 'NRAS')), file='onco-3.pdf'), file='NUL') + + +################################################### +### code chunk number 18: vignette.Rnw:201-202 +################################################### +hypo = hypothesis.add(hypo, 'SF3B1 xor ASXL1', XOR('SF3B1', OR('ASXL1')), '*') + + +################################################### +### code chunk number 19: vignette.Rnw:204-205 (eval = FALSE) +################################################### +## hypo = hypothesis.add(hypo, 'SF3B1 or ASXL1', OR('SF3B1', OR('ASXL1')), '*') + + +################################################### +### code chunk number 20: vignette.Rnw:212-214 +################################################### +as.events(hypo, genes = 'TET2') +hypo = hypothesis.add(hypo, 'TET2 xor IDH2', XOR('TET2', 'IDH2'), '*') + + +################################################### +### code chunk number 21: vignette.Rnw:216-217 (eval = FALSE) +################################################### +## hypo = hypothesis.add(hypo, 'TET2 or IDH2', OR('TET2', 'IDH2'), '*') + + +################################################### +### code chunk number 22: vignette.Rnw:220-221 +################################################### +dummy = oncoprint(events.selection(hypo, filter.in.names = c('TET2', 'IDH2'))) + + +################################################### +### code chunk number 23: vignette.Rnw:223-224 +################################################### +capture.output(oncoprint(events.selection(hypo, filter.in.names = c('TET2', 'IDH2')), file='onco-4.pdf'), file='NUL') + + +################################################### +### code chunk number 24: vignette.Rnw:232-233 +################################################### +hypo = hypothesis.add.homologous(hypo) + + +################################################### +### code chunk number 25: vignette.Rnw:237-238 +################################################### +dummy = oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T) + + +################################################### +### code chunk number 26: vignette.Rnw:240-241 +################################################### +capture.output(oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T, file='onco-5.pdf'), file='NUL') + + +################################################### +### code chunk number 27: vignette.Rnw:251-252 +################################################### +model = tronco.capri(hypo, boot.seed = 12345, regularization='bic', nboot=6) + + +################################################### +### code chunk number 28: figplot +################################################### +tronco.plot(model, + fontsize = 13, + scale.nodes = .6, + confidence = c('tp', 'pr', 'hg'), + height.logic = 0.25, + legend.cex = .5, + pathways = list(priors= gene.hypotheses)) + + +################################################### +### code chunk number 29: vignette.Rnw:274-275 +################################################### +model.boot = tronco.bootstrap(model, nboot=6) + + +################################################### +### code chunk number 30: figplotboot +################################################### +tronco.plot(model.boot, + fontsize = 13, + scale.nodes = .6, + confidence=c('npb'), + height.logic = 0.25, + legend.cex = .5) + + diff --git a/inst/doc/vignette.Rnw b/inst/doc/vignette.Rnw index a2460133..2fef2acc 100644 --- a/inst/doc/vignette.Rnw +++ b/inst/doc/vignette.Rnw @@ -1,4 +1,11 @@ \documentclass[a4paper, 9pt]{article} + +<>= +BiocStyle::latex() +@ + +% \VignetteIndexEntry{An R Package for TRanslational ONCOlogy} + \usepackage{hyperref} \usepackage{amsmath, amsthm, amssymb} \usepackage{xfrac} @@ -16,6 +23,8 @@ \usepackage{url} +\usepackage{placeins} + \usepackage{xspace} @@ -23,11 +32,6 @@ \newcommand{\TRONCO}{\textsc{tronco}} \usepackage{fullpage} -% \VignetteIndexEntry{TRONCO} -%\VignetteIndexEntry{TRONCO} -%\VignetteDepends{TRONCO} -%\VignetteKeywords{TRONCO} -%\VignettePackage{TRONCO} \begin{document} @@ -39,6 +43,7 @@ Marco Antoniotti\footnote{Dipartimento di Informatica Sistemistica e Comunicazione, Universit\'a degli Studi Milano-Bicocca Milano, Italy.} \and Giulio Caravagna$^\ast$ \and +Luca De Sano$^\ast$ \and Alex Graudenzi$^\ast$ \and Ilya Korsunsky\footnote{Courant Institute of Mathematical Sciences, New York University, New York, USA.} \and Mattia Longoni$^\ast$ \and @@ -54,31 +59,28 @@ Daniele Ramazzotti$^\ast$ \begin{center} \begin{minipage}[h]{0.75\textwidth} -\textbf{Abstract.} Genotype-level {\em cancer progression models} describe the ordering of accumulating mutations, e.g., somatic mutations / copy number variations, during cancer development. These graphical models help understand the ``causal structure'' involving events promoting cancer progression, possibly predicting complex patterns characterising genomic progression of a cancer. Reconstructed models can be used to better characterise genotype-phenotype relation, and suggest novel targets for therapy design. - -\TRONCO{} ({\sc tr}{\em anslational} {\sc onco}{\em logy}) is a \textsc{r} package aimed at collecting state-of-the-art algorithms to infer -\emph{progression models} from \emph{cross-sectional} data, i.e., data collected from independent patients which does not necessarily incorporate any evident temporal information. These algorithms require a binary input matrix where: $(i)$ each row represents a patient genome, $(ii)$ each column an event relevant to the progression (a priori selected) and a $0/1$ value models the absence/presence of a certain mutation in a certain patient. - - - -The current first version of \TRONCO{} -implements the \CAPRESE{} algorithm ({\sc ca}{\em ncer} {\sc pr}{\em ogression} {\sc e}{\em xtraction} {\em with} {\sc s}{\em ingle} {\sc e}{\em dges}) to infer possible progression models arranged as \emph{trees}; -cfr. -\begin{itemize} -\item \emph{Inferring tree causal models of cancer progression with - probability raising}, L. Olde Loohuis, G. Caravagna, - A. Graudenzi, D. Ramazzotti, G. Mauri, M. Antoniotti and - B. Mishra. {PLoS One}, \emph{to appear}. -\end{itemize} -This vignette shows how to use \TRONCO{} to infer a tree model of -ovarian cancer progression from CGH data of copy number alterations (classified as gains or losses over chromosome's arms). The dataset used is -available in the SKY/M-FISH database. -The reference manual for \TRONCO{} is available in the package. -\begin{center} -\includegraphics[width=0.9\textwidth]{workflow.png} -\end{center} -\flushright -\scriptsize \em The \TRONCO{} workflow. +\textbf{Abstract.} Genotype-level {\em cancer progression models} describe the temporal ordering in which genomic alterations such as somatic mutations and copy number alterations tend to fixate and accumulate during cancer formation and progression. These graphical models can describe trends of \textit{natural selection} across a population of independent tumour samples (cross-sectional data), or reconstruct the clonal evolution in a single patient's tumour (multi-region or single-cell data). In terms of application, such models can be used to better elucidate genotype-phenotype relation, predict cancer hallmarks and outcome of personalised treatment as well as suggest novel targets for therapy design. +\\ + +\TRONCO{} ({\sc tr}{\em anslational} {\sc onco}{\em logy}) is a \textsc{R} package which collects +algorithms to infer progression models from Bernoulli 0/1 profiles of genomic +alterations across a tumor sample. Such profiles are usually visualised as a +binary input matrix where each row represents a patient's sample (e.g., the +result of a sequenced tumor biopsy), and each column an event relevant to the +progression (a certain type of somatic mutation, a focal or higher-level +chromosomal copy number alteration etc.); a 0/1 value models the absence/presence +of that alteration in the sample. In this version of TRONCO such profiles can +be readily imported by boolean matrices and MAF/GISTIC files. The package provides +various functions to editing, visualise and subset such data, as well as functions +to query the Cbio portal for cancer genomics. This version of TRONCO comes with +the parallel implementations the CAPRESE [PLoS ONE 9(12): e115570] and CAPRI +[Bioinformatics, doi:10.1093/bioinformatics/btv296] algorithms to infer possible +progression models arranged as trees, or general direct acyclic graphs. +Bootstrap functions to assess the parametric, non-prametric and statistical +confidence of every inferred model are also provided. The package comes with +some data available as well, which include the dataset of Atypical Chronic Myeloid +Leukemia samples provided by Piazza et al., Nat. Genet., 45 (2013), and examples. + \end{minipage} \end{center} @@ -91,251 +93,203 @@ The reference manual for \TRONCO{} is available in the package. installed to use the package, see \texttt{Bioconductor.org}. -\paragraph{\large 1. Types/Events definition}{\ }\\ +\paragraph{\large Event selection}{\ }\\ -First, load \TRONCO{} in your \textsc{r} console. +First, load \TRONCO{} in your \textsc{R} console and the example \textit{"dataset"}. <<>>= library(TRONCO) +data(aCML) +hide.progress.bar <<- TRUE @ -Every node in the plotted topology can be colored according to the -color table defined in \textsc{r}. You can use the command -\texttt{colors} to see the available colors, e.g., \texttt{"red"}, \texttt{"blue"} or RGB -\texttt{"\#FF9900FF"}. - -You can start defining the \emph{event types} that you are -considering, and assign them a color. - -As an example, for CGH data we define two types of events, \emph{gain} -and \emph{loss}, which we color \emph{red} and \emph{green} to represent -amplifications or deletion of a chromosome arm. For instance, we can -do this as follows: + +\paragraph{ We use \texttt{show} function to get a short summary of the aCML dataset } <<>>= -types.add("gain", "cornflowerblue") -types.add("loss", "brown1") +show(aCML) @ -If many types have to be defined it might be convenient to load all of -them at once. This is possible by using a tabular input file -(in \texttt{csv} format): -\[ -\texttt{type\_name, type\_color} \qquad\qquad \text e.g., \quad \texttt{red, gain} -\] -and issuing the command \texttt{types.load("types.txt")} -- if types -are defined in file \texttt{types.txt}. The output produced by -\TRONCO{} might show warnings due to, e.g., different types assigned -the same color. - -Once types are defined, you can define the set of \emph{events} in -the dataset (which will constitute the progression), give them a \emph{label}, a type and bind them to a -dataset column. Since in general there are much more events than types, it might be convenient to prepare an external file to load via command {\tt events.load("events.txt")}. The format expected for events is similar to the one expected for types, namely as a tabular input file in \texttt{csv} format: -\[ -\texttt{event\_name, event\_type, column\_number} \qquad\qquad \text e.g., \quad \texttt{8p+, gain, 1}\, . -\] -For the ovarian CGH dataset, such a file contains the following rows (we show the first 3 lines) -\begin{verbatim} -8p+, gain, 1 -3p+, gain, 2 -5q-, loss, 3 -...... -\end{verbatim} -which define, as events, gains in arm $p$ of chromosomes $8$ and $3$, losses on arm $q$ of chromosomes $5$, etc. Given the file \emph{events.txt} where are defined the events with the above notation, the events can be loaded from a file as follows. + +\paragraph{ These are all the events it contains } <<>>= - events.load("events.txt") +as.events(aCML) @ -Events will constitute the nodes in the progression model. If one is willing to add events in a iterative fashion the command {\tt events.add(event\_name, event\_type, column\_number)} can be used. For instance {\tt events.add("8q+", "gain", 1)}. - -At this point, \TRONCO{} executes some consistency checks to ensure that all the added events are of a declared type, and report the user potential inconsistencies. +\paragraph{ Which account for alterations in the following genes } +<<>>= +as.genes(aCML) +@ +\paragraph{ These are \texttt{SETBP1} alterations across input samples } +<<>>= +as.gene(aCML, genes='SETBP1') +@ +\paragraph{ These are the genes for which we found a literature supporting the patterns +that we include below. References are in the main \textit{CAPRI} paper. } +<<>>= +gene.hypotheses = c('KRAS', 'NRAS', 'IDH1', 'IDH2', 'TET2', 'SF3B1', 'ASXL1') +@ -\paragraph{\large 2. Data loading \& Progression inference}{\ }\\ +\paragraph{ Regardless the distinct types of mutations that we included, we want to +select only genes altered in $5\%$ of the cases. Thus we first transform +data in \textit{"Alteration"} (collapsing all event types for the same gene), and +then we use select only those events } +<<>>= +alterations = events.selection(as.alterations(aCML), filter.freq = .05) +@ -Once events are set, you can load the input dataset, which must be -stored in a text file as a binary matrix (once loaded, you can use {\tt tronco.data.view(your\_data)} to visualise loaded data as a heatmap). +\paragraph{ We visualize the selected genes. This plot has no title since name annotation +is not copied by \texttt{events.selection} } <<>>= -data(ov.cgh) -data.load(ov.cgh) +dummy = oncoprint(alterations) +@ +<>= +capture.output(oncoprint(alterations, file='onco-1.pdf'), file='NUL') +@ +\incfig[ht]{onco-1}{0.9\textwidth}{Oncoprint output}{} + +\FloatBarrier + +\paragraph{\large Adding Hypotheses}{\ }\\ + +\paragraph{ Then to reconstruct the aCML model we select from \texttt{data} which have been +selected in \texttt{alteration} - via \texttt{as.genes(alterations)} or that are +part of the prior in \texttt{gene.hypotheses}. We use \texttt{filter.in.names} to +force selection of all the events involving those genes from \texttt{data} } <<>>= -str(data.values) +hypo = events.selection(aCML, filter.in.names=c(as.genes(alterations), gene.hypotheses)) +hypo = annotate.description(hypo, 'CAPRI - Bionformatics aCML data (selected events)') @ -In this case 87 samples are available and 7 events are considered (in general, the inference problem is well posed if there are more samples than events, which is the case here for ovarian). - -Further consistency checks are performed by \TRONCO{} at data-loading time; these include checking that: -\begin{itemize} -\item All the columns of the dataset are assigned a unique event; -\item There are no identical columns in the dataset. If this is the - case, the columns get merged and the events associated get merged - too (a default type is assigned in this case); -\item There are no columns in the dataset solely constituted by 0s - or 1s. If this is the case, the columns and the events associated - are deleted. -\end{itemize} -\TRONCO{} signals the user that the data presents some inconsistency, if that is the case. Once the input is loaded, \CAPRESE{} can -be executed. - -\begin{figure}[t]\center -{\includegraphics[width=0.5\textwidth]{vignette-007}} -\caption{\textbf{Ovarian cancer CGH tree reconstructed with CAPRESE.} - We show the result of reconstruction with \CAPRESE{}. - These trees are plot as explained in \S $2$ and {$3$}. The - tree is the reconstructed model without confidence information.} -\label{fig:tree} -\end{figure} +\paragraph{ We show selected data and we annotate genes in \texttt{gene.hypotheses} to identify +them. Samples names are also shown } <<>>= -topology <- tronco.caprese(data.values, lambda=0.5) +dummy = oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T) @ -In the above example, \CAPRESE{} is executed with a \emph{shrinkage - coefficient} set to $0.5$ (the default value, if not specified), which -is the optimal value for data containing \emph{false positives} and -\emph{false negatives}. If these were absent, the optimal coefficient -should be set to an arbitrary small value, e.g. $10^{-3}$; in any -case the coefficient must be in $[0,1]$. Notice that \TRONCO{} -provides an \emph{empirical estimation} of the the rate of false -positives and negatives in the data, given the reconstructed model; -this is done via $\ell_2$ distance. - -The returned topology can be printed to screen by using the -\texttt{topology} object print method, or can be visualized by using -the \texttt{tronco.plot} function. -<>= -topology -tronco.plot(topology, title="Ovarian cancer progression with CAPRESE", legend.title="CGH events", - legend.coeff = 1.0, label.coeff = 1.2, legend = TRUE) +<>= +capture.output(oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T, file='onco-2.pdf'), file='NUL') @ +\incfig[ht]{onco-2}{0.9\textwidth}{Oncoprint output}{} -In this case we are assigning a title to the plot, we are requiring -to display a legend (\texttt{ legend = TRUE}), and we are setting custom -size for the text in the legend (\texttt{legend.coeff = 0.7}, $70\%$ -of the default size) and in the model (\texttt{ label.coeff = 1.2}); -see Figure \ref{fig:tree}. +\FloatBarrier -\paragraph{\large 3. Confidence estimation}{\ }\\ +\paragraph{ We now add the hypotheses that are described in CAPRI's manuscript } -\begin{figure}[t]\centerline{ -\fbox{\includegraphics[width=0.33\textwidth]{vignette-008}} -\fbox{\includegraphics[width=0.33\textwidth]{vignette-009}} -\fbox{\includegraphics[width=0.33\textwidth]{vignette-010}} \\ -} +\paragraph{ Add hypotheses of hard exclusivity (XOR) for NRAS/KRAS events (Mutation). The hypothesis is tested +against all other dataset events } +<<>>= +hypo = hypothesis.add(hypo, 'NRAS xor KRAS', XOR('NRAS', 'KRAS')) +@ -\centerline{ -\fbox{\includegraphics[width=0.33\textwidth]{vignette-011}} -\fbox{\includegraphics[width=0.33\textwidth]{vignette-012}} -\fbox{\includegraphics[width=0.33\textwidth]{vignette-013}} -} -\caption{\textbf{Probabilities (input data): visualisation and comparison with model's predictions.} Top: observed - \emph{frequencies} of \emph{observed}, \emph{joint} and - \emph{conditional} distributions of events (conditionals are - restricted according to the reconstructed progression - model) as emerge from the data. Bottom: difference between observed and fitted - probabilities, according to the reconstructed progression.} -\label{fig:distrib} -\end{figure} - -\paragraph{Data and model probabilities.} Before estimating the -confidence of a reconstruction, one might print and visualise the -\emph{frequency of occurrence} for each event, the \emph{ joint - distribution} and the \emph{conditional distribution} according to -the input data (i.e., the \emph{observed} probabilities). Notice -that for the conditional distribution we condition only on the parent -of a node, as reconstructed in the returned model. Plots of these distributions are shown in Figure -\ref{fig:distrib}, and are evaluated as follows. -<>= - confidence.data.single(topology) +\paragraph{ Here we try to include also a soft exclusivity (OR) pattern but, since its \textit{"signature"} +is the same of the hard one, it will not be included. The code below is commented because it gives errors. } +<>= +hypo = hypothesis.add(hypo, 'NRAS or KRAS', OR('NRAS', 'KRAS')) @ -<>= - confidence.data.joint(topology) + +\paragraph{ For the sake to better highlight the perfect (hard) exclusivity between NRAS/KRAS +mutations one can visualize their alterations } +<<>>= +dummy = oncoprint(events.selection(hypo, filter.in.names = c('KRAS', 'NRAS'))) @ -<>= - confidence.data.conditional(topology) +<>= +capture.output(oncoprint(events.selection(hypo, filter.in.names = c('KRAS', 'NRAS')), file='onco-3.pdf'), file='NUL') @ +\incfig[ht]{onco-3}{0.9\textwidth}{Oncoprint output}{} -In a similar way, by using \texttt{ confidence.fit.single(topology)}, -\texttt{ confidence.fit.joint(topology)} or -\texttt{confidence.fit.conditional(topology)}, the analogous -probabilities can be assessed according to the model. This are not -shown in this vignette. +\FloatBarrier -The difference between observed and fit probabilities can be -visualised as follows. -<>= -confidence.single(topology) +\paragraph{ This is as above, but includes other events. Again, we can include only the hard exclusivity pattern } +<<>>= +hypo = hypothesis.add(hypo, 'SF3B1 xor ASXL1', XOR('SF3B1', OR('ASXL1')), '*') +@ +<>= +hypo = hypothesis.add(hypo, 'SF3B1 or ASXL1', OR('SF3B1', OR('ASXL1')), '*') @ -<>= -confidence.joint(topology) + +\paragraph{ We now do the same for TET2 and IDH2. In this case 3 events for TET2 are present, which are +\textit{"Ins/Del"}, \textit{"Missense point"} and \textit{"Nonsense point"}. For this reason, since we are not specifying +a subset of such events all TET2 alterations are used. Since these show a perfect hard exclusivity +trend these will be included in XOR. } +<<>>= +as.events(hypo, genes = 'TET2') +hypo = hypothesis.add(hypo, 'TET2 xor IDH2', XOR('TET2', 'IDH2'), '*') @ -<>= -confidence.conditional(topology) +<>= +hypo = hypothesis.add(hypo, 'TET2 or IDH2', OR('TET2', 'IDH2'), '*') @ +<<>>= +dummy = oncoprint(events.selection(hypo, filter.in.names = c('TET2', 'IDH2'))) +@ +<>= +capture.output(oncoprint(events.selection(hypo, filter.in.names = c('TET2', 'IDH2')), file='onco-4.pdf'), file='NUL') +@ +\incfig[ht]{onco-4}{0.9\textwidth}{Oncoprint output}{} -\paragraph{Bootstrap confidence.}{\ }\\ +\FloatBarrier -Confidence in a model can be estimated via \emph{parametric} and -\emph{non-parametric bootstrap}. In the former case, the model is -assumed to be correct and data is sampled by the model, in the latter -case resamples are taken from the input data, with repetitions. In any -case, the reconstruction confidence is the number of times that the -estimated tree or edge is inferred out of a number of -resamples. The parameters of the bootstrap procedure can be custom -set. +\paragraph{ For every gene that has more than one event associated we also add a soft exclusivity pattern +for its events } +<<>>= +hypo = hypothesis.add.homologous(hypo) +@ +\paragraph{ The dataset input to CAPRI is shown } <<>>= -set.seed(12345) -topology <- tronco.bootstrap(topology, type="non-parametric", nboot=1000) +dummy = oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T) @ -<>= -tronco.bootstrap.show(topology) +<>= +capture.output(oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T, file='onco-5.pdf'), file='NUL') @ +\incfig[ht]{onco-5}{0.9\textwidth}{Oncoprint output}{} + +\FloatBarrier +\paragraph{\large Model reconstruction}{\ }\\ -In this case, for instance, we are performing non-parametric bootstrap -(the default one) with $1000$ repetitions and, since no shrinkage -coefficient is specified, we are still using $0.5$. Here the estimated -error rates are used to include noise levels estimated from the -data/model. To perform parametric bootstrap is enough to use the flag -\texttt{ type="parametric"}. +\paragraph{ We execute CAPRI with its default parameter: we use both AIC/BIC regularizators, Hill-climbing +exhaustive bootstrap (100 replicates for Wilcoxon testing), p-value 0.05 and we set seed } <<>>= -set.seed(12345) -topology <- tronco.bootstrap(topology, type="parametric", nboot=1000) +model = tronco.capri(hypo, boot.seed = 12345, regularization='bic', nboot=6) @ -<>= -tronco.bootstrap.show(topology) + +\paragraph{ We can plot the reconstructed model. We set some parameters to get a fancy plot; confidence +is shown as temporal priority and probability raising (selective advantage scores) and +hypergeometric testing (goodness of input data). } +<>= +tronco.plot(model, + fontsize = 13, + scale.nodes = .6, + confidence = c('tp', 'pr', 'hg'), + height.logic = 0.25, + legend.cex = .5, + pathways = list(priors= gene.hypotheses)) @ +\incfig[ht]{vignette-figplot}{0.9\textwidth}{aCML Reconstructed model} +{Pre bootstrap.} +\FloatBarrier +\paragraph{\large Bootstrapping data}{\ }\\ -Results of bootstrapping are visualized as a table (useful for edge -confidence), and as a heatmap by using command -\texttt{tronco.bootstrap.show}. The overall model confidence is -reported, too. In Figure 3 results of bootstrap are -shown. If one is willing to visualize this confidence in the plot of -the inferred tree an input flag \texttt{confidence} can be used with -function \texttt{tronco.plot}. For instance: -<>= -tronco.plot(topology, title="Ovarian cancer progression with CAPRESE", legend.title="CGH events", - legend.coeff = 1.0, label.coeff = 1.2, legend = TRUE, confidence = TRUE) +<<>>= +model.boot = tronco.bootstrap(model, nboot=6) @ -In this case, the thicker lines reflect the most confident edges; -confidence is also reported as labels of edges, as shown in -Figure 4 -% -% -% These are visualized in Figure \ref{fig:bootstrap}. +<>= +tronco.plot(model.boot, + fontsize = 13, + scale.nodes = .6, + confidence=c('npb'), + height.logic = 0.25, + legend.cex = .5) +@ +\incfig[ht]{vignette-figplotboot}{0.9\textwidth}{aCML Reconstructed model} +{After bootstrap.} + + +\end{document} -\begin{figure}[t]\center -\fbox{\includegraphics[width=0.45\textwidth]{vignette-015}} -\fbox{\includegraphics[width=0.45\textwidth]{vignette-017}} -\caption{\textbf{Bootstrap for edge confidence.} Non-parametric and parametric confidence in each reconstructed edge as assessed via bootstrapping.} -\label{fig:bootstrap} -\end{figure} -\begin{figure}[t]\center -\fbox{\includegraphics[width=0.45\textwidth]{vignette-018}} -\caption{\textbf{Bootstrap information included in the model.} You can include the result of edge confidence estimation via bootstrap by using flag {\tt confidence}. In this case the thickness of each edge is proportional to its estimated confidence.} -\label{fig:bootstrap} -\end{figure} +% buggone -> fix http://stackoverflow.com/questions/12481267/in-r-how-to-prevent-blank-page-in-pdf-when-using-gridbase-to-embed-subplot-insi ?? -\end{document} \ No newline at end of file diff --git a/inst/doc/vignette.pdf b/inst/doc/vignette.pdf index b31beb4f..53e88651 100644 Binary files a/inst/doc/vignette.pdf and b/inst/doc/vignette.pdf differ diff --git a/inst/doc/workflow.png b/inst/doc/workflow.png deleted file mode 100644 index 4b8514c9..00000000 Binary files a/inst/doc/workflow.png and /dev/null differ diff --git a/inst/unitTests/test_load.R b/inst/unitTests/test_load.R deleted file mode 100644 index c73b5bda..00000000 --- a/inst/unitTests/test_load.R +++ /dev/null @@ -1,55 +0,0 @@ -test_types.add <- function(){ - types.add("type", "red") - checkTrue(any(types[,"color"] == "red")) - reset() -} - -test_types.add_duplicates <- function(){ - types.add("type", "red") - types.add("type", "red") - checkTrue(nrow(types) == 1) - obs <- tryCatch(types.add("type", "red"), warning=conditionMessage) - checkIdentical("Event type type redefined, now has color: red", obs) - reset() -} - -test_types.add_diff_color <- function(){ - types.add("gain", "red") - types.add("gain", "blue") - checkTrue(types[which(types[,"type"] == "gain"), "color"] == "blue") - reset() -} - -test_events.add_duplicates <- function(){ - types.add("gain", "blue") - events.add("a", "gain", 1) - events.add("a", "gain", 1) - checkTrue(nrow(events) == 1) - reset() -} - -test_events.add_same_col <- function(){ - types.add("gain", "blue") - events.add("a", "gain", 1) - events.add("b", "gain", 1) - checkTrue(nrow(events) == 1) - reset() -} - -test_events.add_same_key <- function(){ - types.add("gain", "blue") - events.add("a", "gain", 1) - events.add("a", "gain", 2) - checkTrue(nrow(events) == 1) - reset() -} - -test_events.add <- function(){ - types.add("gain", "blue") - types.add("loss", "green") - events.add("a", "gain", 1) - events.add("a", "loss", 2) - checkTrue(nrow(events) == 2) - reset() -} - diff --git a/inst/unitTests/test_no_loaded.R b/inst/unitTests/test_no_loaded.R deleted file mode 100644 index b9291ce9..00000000 --- a/inst/unitTests/test_no_loaded.R +++ /dev/null @@ -1,18 +0,0 @@ -test_no_loaded <- function(){ - - reset() - obs <- tryCatch(events.add("A", "loss", 1), error=conditionMessage) - checkIdentical("types variable not defined!", obs) - -} -test_shrinkage_value <- function(){ - - reset() - data(types) - data(events) - data(ov.cgh) - data.load(ov.cgh) - obs <- tryCatch(tronco.caprese(data.values, lambda = 2), error=conditionMessage) - checkIdentical("Lambda coefficient must be in [0:1]!", obs) - -} \ No newline at end of file diff --git a/inst/unitTests/test_topology.R b/inst/unitTests/test_topology.R deleted file mode 100644 index a5e74a90..00000000 --- a/inst/unitTests/test_topology.R +++ /dev/null @@ -1,43 +0,0 @@ -test_topology <- function(){ - - reset() - - types.add("gain", "red") - types.add("loss", "blue") - - events.add("8q+", "gain", 1) - events.add("3q+", "gain", 2) - events.add("5q-", "loss", 3) - events.add("4q-", "loss", 4) - events.add("8p-", "loss", 5) - events.add("1q+", "gain", 6) - events.add("Xp-", "loss", 7) - - data(ov.cgh) - data.load(ov.cgh) - - checkTrue(exists("data.values")) - checkTrue(ncol(data.values) == 7) - - topology <- tronco.caprese(data.values, lambda = 0.5) - - adj.matrix <- topology@adj.matrix - - # Checks if the expected edges between well known relations are set - - checkTrue(adj.matrix[1,2] == 1) - checkTrue(adj.matrix[3,4] == 1) - checkTrue(adj.matrix[1,5] == 1) - checkTrue(adj.matrix[5,7] == 1) - - - set.seed(1234) - - topology <- tronco.bootstrap(topology, type = "non-parametric", nboot = 100) - - checkEqualsNumeric(topology@confidence$overall.value, 7) - checkEqualsNumeric(topology@edge.confidence["8q+:gain","3q+:gain"], 0.96) - checkEqualsNumeric(topology@edge.confidence["5q-:loss","4q-:loss"], 0.45) - - -} \ No newline at end of file diff --git a/man/AND.Rd b/man/AND.Rd new file mode 100644 index 00000000..9f43b385 --- /dev/null +++ b/man/AND.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.hypotheses.R +\name{AND} +\alias{AND} +\title{AND} +\usage{ +AND(...) +} +\arguments{ +\item{...}{Atoms of the co-occurance pattern given either as labels or as partielly lifted vectors.} +} +\value{ +Vector to be added to the lifted genotype resolving the co-occurance pattern +} +\description{ +AND hypothesis +} + diff --git a/man/OR.Rd b/man/OR.Rd new file mode 100644 index 00000000..5e7e8f0a --- /dev/null +++ b/man/OR.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.hypotheses.R +\name{OR} +\alias{OR} +\title{OR} +\usage{ +OR(...) +} +\arguments{ +\item{...}{Atoms of the soft exclusive pattern given either as labels or as partielly lifted vectors.} +} +\value{ +Vector to be added to the lifted genotype resolving the soft exclusive pattern +} +\description{ +OR hypothesis +} + diff --git a/man/TCGA.map.clinical.data.Rd b/man/TCGA.map.clinical.data.Rd new file mode 100644 index 00000000..3560b143 --- /dev/null +++ b/man/TCGA.map.clinical.data.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/external.R +\name{TCGA.map.clinical.data} +\alias{TCGA.map.clinical.data} +\title{TCGA.map.clinical.data} +\usage{ +TCGA.map.clinical.data(file, sep = "\\t", column.samples, column.map) +} +\arguments{ +\item{file}{TODO} + +\item{sep}{file delimiter} + +\item{column.samples}{TODO} + +\item{column.map}{TODO} +} +\value{ +a map +} +\description{ +TODO +} + diff --git a/man/TCGA.multiple.samples.Rd b/man/TCGA.multiple.samples.Rd new file mode 100644 index 00000000..5138a0fd --- /dev/null +++ b/man/TCGA.multiple.samples.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/external.R +\name{TCGA.multiple.samples} +\alias{TCGA.multiple.samples} +\title{TCGA.multiple.samples} +\usage{ +TCGA.multiple.samples(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +A list of barcodes. NA if no duplicated barcode is found +} +\description{ +Check if there are multiple sample in x, according to TCGA barcodes naming +} + diff --git a/man/TCGA.remove.multiple.samples.Rd b/man/TCGA.remove.multiple.samples.Rd new file mode 100644 index 00000000..185465be --- /dev/null +++ b/man/TCGA.remove.multiple.samples.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/external.R +\name{TCGA.remove.multiple.samples} +\alias{TCGA.remove.multiple.samples} +\title{TCGA.remove.multiple.samples} +\usage{ +TCGA.remove.multiple.samples(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +A TRONCO compliant dataset +} +\description{ +If there are multiple sample in x, according to TCGA barcodes naming, remove them +} + diff --git a/man/TCGA.shorten.barcodes.Rd b/man/TCGA.shorten.barcodes.Rd new file mode 100644 index 00000000..bc4915da --- /dev/null +++ b/man/TCGA.shorten.barcodes.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/external.R +\name{TCGA.shorten.barcodes} +\alias{TCGA.shorten.barcodes} +\title{TCGA.shorten.barcodes} +\usage{ +TCGA.shorten.barcodes(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +A TRONCO compliant dataset +} +\description{ +Keep only the first 12 character of samples barcode if there are no duplicates +} + diff --git a/man/TRONCO.Rd b/man/TRONCO.Rd new file mode 100644 index 00000000..a68bc4a6 --- /dev/null +++ b/man/TRONCO.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tronco.R +\docType{package} +\name{TRONCO} +\alias{TRONCO} +\alias{TRONCO-package} +\title{TRONCO (TRanslational ONCOlogy) is a R package which collects + algorithms to infer progression models from Bernoulli 0/1 profiles of genomic + alterations across a tumor sample. Such profiles are usually visualised as a + binary input matrix where each row represents a patient's sample (e.g., the + result of a sequenced tumor biopsy), and each column an event relevant to the + progression (a certain type of somatic mutation, a focal or higher-level + chromosomal copy number alteration etc.); a 0/1 value models the absence/presence + of that alteration in the sample. In this version of TRONCO such profiles can + be readily imported by boolean matrices and MAF/GISTIC files. The package provides + various functions to editing, visualise and subset such data, as well as functions + to query the Cbio portal for cancer genomics. This version of TRONCO comes with + the parallel implementations the CAPRESE [PLoS ONE 9(12): e115570] and CAPRI + [Bioinformatics, doi:10.1093/bioinformatics/btv296] algorithms to infer possible + progression models arranged as trees, or general direct acyclic graphs. + Bootstrap functions to assess the parametric, non-prametric and statistical + confidence of every inferred model are also provided. The package comes with + some data available as well, which include the dataset of Atypical Chronic Myeloid + Leukemia samples provided by Piazza et al., Nat. Genet., 45 (2013), and examples.} +\description{ +TRONCO (TRanslational ONCOlogy) is a R package which collects + algorithms to infer progression models from Bernoulli 0/1 profiles of genomic + alterations across a tumor sample. Such profiles are usually visualised as a + binary input matrix where each row represents a patient's sample (e.g., the + result of a sequenced tumor biopsy), and each column an event relevant to the + progression (a certain type of somatic mutation, a focal or higher-level + chromosomal copy number alteration etc.); a 0/1 value models the absence/presence + of that alteration in the sample. In this version of TRONCO such profiles can + be readily imported by boolean matrices and MAF/GISTIC files. The package provides + various functions to editing, visualise and subset such data, as well as functions + to query the Cbio portal for cancer genomics. This version of TRONCO comes with + the parallel implementations the CAPRESE [PLoS ONE 9(12): e115570] and CAPRI + [Bioinformatics, doi:10.1093/bioinformatics/btv296] algorithms to infer possible + progression models arranged as trees, or general direct acyclic graphs. + Bootstrap functions to assess the parametric, non-prametric and statistical + confidence of every inferred model are also provided. The package comes with + some data available as well, which include the dataset of Atypical Chronic Myeloid + Leukemia samples provided by Piazza et al., Nat. Genet., 45 (2013), and examples. +} + diff --git a/man/XOR.Rd b/man/XOR.Rd new file mode 100644 index 00000000..29938b3e --- /dev/null +++ b/man/XOR.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.hypotheses.R +\name{XOR} +\alias{XOR} +\title{XOR} +\usage{ +XOR(...) +} +\arguments{ +\item{...}{Atoms of the hard exclusive pattern given either as labels or as partielly lifted vectors.} +} +\value{ +Vector to be added to the lifted genotype resolving the hard exclusive pattern +} +\description{ +XOR hypothesis +} + diff --git a/man/aCML.Rd b/man/aCML.Rd new file mode 100644 index 00000000..ee9bebc3 --- /dev/null +++ b/man/aCML.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{aCML} +\alias{aCML} +\title{Atypical chronic myeloid leukemia dataset} +\format{TRONCO compliant dataset} +\source{ +data from http://www.nature.com/ng/journal/v45/n1/full/ng.2495.html +} +\usage{ +data(aCML) +} +\description{ +This file contains a TRONCO compliant dataset +} +\author{ +Luca De Sano +} + diff --git a/man/annotate.description.Rd b/man/annotate.description.Rd new file mode 100644 index 00000000..4db4549a --- /dev/null +++ b/man/annotate.description.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{annotate.description} +\alias{annotate.description} +\title{annotate.description} +\usage{ +annotate.description(x, label) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{label}{A string} +} +\value{ +A TRONCO compliant dataset. +} +\description{ +Annotate a description on the selected dataset +} +\examples{ +data(test_dataset) +annotate.description(test_dataset, 'new description') +} + diff --git a/man/annotate.stages.Rd b/man/annotate.stages.Rd new file mode 100644 index 00000000..5f661720 --- /dev/null +++ b/man/annotate.stages.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{annotate.stages} +\alias{annotate.stages} +\title{annotate.stages} +\usage{ +annotate.stages(x, stages, match.TCGA.patients = FALSE) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{stages}{A list of stages. Rownames must match samples list of x} + +\item{match.TCGA.patients}{Match using TCGA notations (only first 12 characters)} +} +\value{ +A TRONCO compliant dataset. +} +\description{ +Annotate stage information on the selected dataset +} +\examples{ +data(test_dataset) +data(stage) +test_dataset = annotate.stages(test_dataset, stage) +as.stages(test_dataset) +} + diff --git a/man/as.adj.matrix.Rd b/man/as.adj.matrix.Rd new file mode 100644 index 00000000..ceef1baa --- /dev/null +++ b/man/as.adj.matrix.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.adj.matrix} +\alias{as.adj.matrix} +\title{as.adj.matrix} +\usage{ +as.adj.matrix(x, events = as.events(x), models = names(x$model), + type = "fit") +} +\arguments{ +\item{x}{A TRONCO model.} + +\item{events}{A subset of events as of \code{as.events(x)}, all by default.} + +\item{models}{A subset of reconstructed models, all by default.} + +\item{type}{Either the prima facie ('pf') or the post-regularization ('fit') matrix, 'fit' by default.} +} +\value{ +The adjacency matrix of a TRONCO model. +} +\description{ +Extract the adjacency matrix of a TRONCO model. The matrix is indexed with colnames/rownames which +represent genotype keys - these can be resolved with function \code{keysToNames}. It is possible to +specify a subset of events to build the matrix, a subset of models if multiple reconstruction have +been performed. Also, either the prima facie matrix or the post-regularization matrix can be extracted. +} +\examples{ +data(test_model) +as.adj.matrix(test_model) +as.adj.matrix(test_model, events=as.events(test_model)[5:15,]) +as.adj.matrix(test_model, events=as.events(test_model)[5:15,], type='pf') +} + diff --git a/man/as.alterations.Rd b/man/as.alterations.Rd new file mode 100644 index 00000000..18c8287f --- /dev/null +++ b/man/as.alterations.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.alterations} +\alias{as.alterations} +\title{as.alterations} +\usage{ +as.alterations(x, new.type = "Alteration", new.color = "khaki") +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{new.type}{The types label of the new event type, 'Alteration' by default.} + +\item{new.color}{The color of the event \code{new.type}, default 'khaki'.} +} +\value{ +A TRONCO compliant dataset with alteration profiles. +} +\description{ +Return a dataset where all events for a gene are merged in a unique event, i.e., +a total of gene-level alterations diregarding the event type. Input 'x' is checked +to be a TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(muts) +as.alterations(muts) +} + diff --git a/man/as.colors.Rd b/man/as.colors.Rd new file mode 100644 index 00000000..fae275d2 --- /dev/null +++ b/man/as.colors.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.colors} +\alias{as.colors} +\title{as.colors} +\usage{ +as.colors(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +A named vector of colors. +} +\description{ +Return the colors associated to each type of event in 'x', which should be a +TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(test_dataset) +as.colors(test_dataset) +} + diff --git a/man/as.conditional.probs.Rd b/man/as.conditional.probs.Rd new file mode 100644 index 00000000..3618f576 --- /dev/null +++ b/man/as.conditional.probs.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.conditional.probs} +\alias{as.conditional.probs} +\title{as.conditional.probs} +\usage{ +as.conditional.probs(x, events = as.events(x), models = names(x$model), + type = "observed") +} +\arguments{ +\item{x}{A TRONCO model.} + +\item{events}{A subset of events as of \code{as.events(x)}, all by default.} + +\item{models}{A subset of reconstructed models, all by default.} + +\item{type}{Either observed ('observed') or fit ('fit') probabilities, 'observed' by default.} +} +\value{ +The conditional probabilities in a TRONCO model. +} +\description{ +Extract the conditional probabilities from a TRONCO model. The return matrix is indexed with rownames which +represent genotype keys - these can be resolved with function \code{keysToNames}. It is possible to +specify a subset of events to build the matrix, a subset of models if multiple reconstruction have +been performed. Also, either the observed or fit probabilities can be extracted. +} + diff --git a/man/as.confidence.Rd b/man/as.confidence.Rd new file mode 100644 index 00000000..6a976ceb --- /dev/null +++ b/man/as.confidence.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.confidence} +\alias{as.confidence} +\title{as.confidence} +\usage{ +as.confidence(x, conf) +} +\arguments{ +\item{x}{A TRONCO model.} + +\item{conf}{A vector with any of 'tp', 'pr', 'hg', 'npb', 'pb' or 'sb'.} +} +\value{ +A list of matrices with the event-to-event confidence. +} +\description{ +Return confidence information for a TRONCO model. Available information are: temporal priority (tp), +probability raising (pr), hypergeometric test (hg), parametric (pb), non parametric (npb) or +statistical (sb) bootstrap. +Confidence is available only once a model has been reconstructed with any of the algorithms implemented +in TRONCO. If more than one model has been reconstructed - for instance via multiple regularizations - +confidence information is appropriately nested. The requested confidence is specified via +vector parameter \code{conf}. +} +\examples{ +data(test_model) +as.confidence(test_model, conf='tp') +as.confidence(test_model, conf=c('tp', 'hg')) +} + diff --git a/man/as.description.Rd b/man/as.description.Rd new file mode 100644 index 00000000..5d2d579e --- /dev/null +++ b/man/as.description.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.description} +\alias{as.description} +\title{as.description} +\usage{ +as.description(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +The description annotating the dataset, if any. +} +\description{ +Return the description annotating the dataset, if any. Input 'x' should be +a TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(test_dataset) +as.description(test_dataset) +} + diff --git a/man/as.error.rates.Rd b/man/as.error.rates.Rd new file mode 100644 index 00000000..f7b8d2b2 --- /dev/null +++ b/man/as.error.rates.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.error.rates} +\alias{as.error.rates} +\title{as.error.rates} +\usage{ +as.error.rates(x, models = names(x$model)) +} +\arguments{ +\item{x}{A TRONCO model.} + +\item{models}{A subset of reconstructed models, all by default.} +} +\value{ +The estimated rates of false positives an negatives in the data, given the model. +} +\description{ +Extract the estimated rates of false positives an negatives in the data, given the model. +A subset of models if multiple reconstruction have been performed can be extracted. +} +\examples{ +data(test_model) +as.error.rates(test_model) +} + diff --git a/man/as.events.Rd b/man/as.events.Rd new file mode 100644 index 00000000..ab884dd2 --- /dev/null +++ b/man/as.events.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.events} +\alias{as.events} +\title{as.events} +\usage{ +as.events(x, genes = NA, types = NA) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{genes}{The genes to consider, if NA all available genes are used.} + +\item{types}{The types of events to consider, if NA all available types are used.} +} +\value{ +A matrix with 2 columns (event type, gene name) for the events found. +} +\description{ +Return all events involving certain genes and of a certain type in 'x', which should be a +TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(test_dataset) +as.events(test_dataset) +as.events(test_dataset, types='ins_del') +as.events(test_dataset, genes = 'TET2') +as.events(test_dataset, types='Missing') +} + diff --git a/man/as.events.in.patterns.Rd b/man/as.events.in.patterns.Rd new file mode 100644 index 00000000..6f7ebb1b --- /dev/null +++ b/man/as.events.in.patterns.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.events.in.patterns} +\alias{as.events.in.patterns} +\title{as.events.in.patterns} +\usage{ +as.events.in.patterns(x, patterns = NULL) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{patterns}{A list of patterns for which the list will be returned} +} +\value{ +A list of events present in patterns which consitute CAPRI's hypotheses +} +\description{ +Return the list of events present in selected patterns +} +\examples{ +data(test_dataset) +as.events.in.patterns(test_dataset) +as.events.in.patterns(test_dataset, patterns='XOR_EZH2') +} + diff --git a/man/as.events.in.sample.Rd b/man/as.events.in.sample.Rd new file mode 100644 index 00000000..90d1bc17 --- /dev/null +++ b/man/as.events.in.sample.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.events.in.sample} +\alias{as.events.in.sample} +\title{as.events.in.sample} +\usage{ +as.events.in.sample(x, sample) +} +\arguments{ +\item{x}{A TRONCO compliant dataset} + +\item{sample}{Vector of sample names} +} +\value{ +A list of events which are observed in the input samples list +} +\description{ +Return a list of events which are observed in the input samples list +} +\examples{ +data(test_dataset) +as.events.in.sample(test_dataset, c('patient 1', 'patient 7')) +} + diff --git a/man/as.events.test.Rd b/man/as.events.test.Rd new file mode 100644 index 00000000..4d9855ba --- /dev/null +++ b/man/as.events.test.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{as.events.test} +\alias{as.events.test} +\title{as events matrix} +\format{matrix} +\source{ +fake data +} +\usage{ +data(as.events.test) +} +\description{ +This data set list ... +} +\author{ +Luca De Sano +} + diff --git a/man/as.gene.Rd b/man/as.gene.Rd new file mode 100644 index 00000000..8bd2d0d2 --- /dev/null +++ b/man/as.gene.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.gene} +\alias{as.gene} +\title{as.gene} +\usage{ +as.gene(x, genes, types = NA) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{genes}{The genes to consider, if NA all available genes are used.} + +\item{types}{The types of events to consider, if NA all available types are used.} +} +\value{ +A matrix, subset of \code{as.genotypes(x)} with colnames substituted with events' types. +} +\description{ +Return the genotypes for a certain set of genes and type of events. Input 'x' should be a +TRONCO compliant dataset - see \code{is.compliant}. In this case column names are substituted +with events' types. +} +\examples{ +data(test_dataset) +as.gene(test_dataset, genes = c('EZH2', 'ASXL1')) +} + diff --git a/man/as.genes.Rd b/man/as.genes.Rd new file mode 100644 index 00000000..fca0d521 --- /dev/null +++ b/man/as.genes.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.genes} +\alias{as.genes} +\title{as.genes} +\usage{ +as.genes(x, types = NA) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{types}{The types of events to consider, if NA all available types are used.} +} +\value{ +A vector of gene symbols for which a certain type of event exists +} +\description{ +Return all gene symbols for which a certain type of event exists in 'x', which should be a +TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(test_dataset) +as.genes(test_dataset) +} + diff --git a/man/as.genes.in.patterns.Rd b/man/as.genes.in.patterns.Rd new file mode 100644 index 00000000..b1b5ac24 --- /dev/null +++ b/man/as.genes.in.patterns.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.genes.in.patterns} +\alias{as.genes.in.patterns} +\title{as.genes.in.patterns} +\usage{ +as.genes.in.patterns(x, patterns = NULL) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{patterns}{A list of patterns for which the list will be returned} +} +\value{ +A list of genes present in patterns which consitute CAPRI's hypotheses +} +\description{ +Return the list of genes present in selected patterns +} +\examples{ +data(test_dataset) +as.genes.in.patterns(test_dataset) +as.genes.in.patterns(test_dataset, patterns='XOR_EZH2') +} + diff --git a/man/as.genotypes.Rd b/man/as.genotypes.Rd new file mode 100644 index 00000000..ef32d980 --- /dev/null +++ b/man/as.genotypes.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.genotypes} +\alias{as.genotypes} +\title{as.genotypes} +\usage{ +as.genotypes(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +A TRONCO genotypes matrix. +} +\description{ +Return all genotypes for input 'x', which should be a TRONCO compliant dataset +see \code{is.compliant}. +Function \code{keysToNames} can be used to translate colnames to events. +} +\examples{ +data(test_dataset) +as.genotypes(test_dataset) +} + diff --git a/man/as.genotypes.test.Rd b/man/as.genotypes.test.Rd new file mode 100644 index 00000000..1fc61b38 --- /dev/null +++ b/man/as.genotypes.test.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{as.genotypes.test} +\alias{as.genotypes.test} +\title{as genotypes matrix} +\format{matrix} +\source{ +da mettere +} +\usage{ +data(as.genotypes.test) +} +\description{ +This data set list ... +} +\author{ +Luca De Sano +} + diff --git a/man/as.hypotheses.Rd b/man/as.hypotheses.Rd new file mode 100644 index 00000000..b9905ac0 --- /dev/null +++ b/man/as.hypotheses.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.hypotheses} +\alias{as.hypotheses} +\title{as.hypotheses} +\usage{ +as.hypotheses(x, cause = NA, effect = NA) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{cause}{A list of genes to use as causes} + +\item{effect}{A list of genes to use as effects} +} +\value{ +The hypotheses in the dataset which constitute CAPRI's hypotheses. +} +\description{ +Return the hypotheses in the dataset which constitute CAPRI's hypotheses. +} +\examples{ +data(test_dataset) +as.hypotheses(test_dataset) +} + diff --git a/man/as.joint.probs.Rd b/man/as.joint.probs.Rd new file mode 100644 index 00000000..46367393 --- /dev/null +++ b/man/as.joint.probs.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.joint.probs} +\alias{as.joint.probs} +\title{as.joint.probs} +\usage{ +as.joint.probs(x, events = as.events(x), models = names(x$model), + type = "observed") +} +\arguments{ +\item{x}{A TRONCO model.} + +\item{events}{A subset of events as of \code{as.events(x)}, all by default.} + +\item{models}{A subset of reconstructed models, all by default.} + +\item{type}{Either observed ('observed') or fit ('fit') probabilities, 'observed' by default.} +} +\value{ +The joint probabilities in a TRONCO model. +} +\description{ +Extract the joint probabilities from a TRONCO model. The return matrix is indexed with rownames/colnames which +represent genotype keys - these can be resolved with function \code{keysToNames}. It is possible to +specify a subset of events to build the matrix, a subset of models if multiple reconstruction have +been performed. Also, either the observed or fit probabilities can be extracted. +} +\examples{ +data(test_model) +as.joint.probs(test_model) +as.joint.probs(test_model, events=as.events(test_model)[5:15,]) +} + diff --git a/man/as.marginal.probs.Rd b/man/as.marginal.probs.Rd new file mode 100644 index 00000000..3c55ea89 --- /dev/null +++ b/man/as.marginal.probs.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.marginal.probs} +\alias{as.marginal.probs} +\title{as.marginal.probs} +\usage{ +as.marginal.probs(x, events = as.events(x), models = names(x$model), + type = "observed") +} +\arguments{ +\item{x}{A TRONCO model.} + +\item{events}{A subset of events as of \code{as.events(x)}, all by default.} + +\item{models}{A subset of reconstructed models, all by default.} + +\item{type}{Either observed ('observed') or fit ('fit') probabilities, 'observed' by default.} +} +\value{ +The marginal probabilities in a TRONCO model. +} +\description{ +Extract the marginal probabilities from a TRONCO model. The return matrix is indexed with rownames which +represent genotype keys - these can be resolved with function \code{keysToNames}. It is possible to +specify a subset of events to build the matrix, a subset of models if multiple reconstruction have +been performed. Also, either the observed or fit probabilities can be extracted. +} +\examples{ +data(test_model) +as.marginal.probs(test_model) +as.marginal.probs(test_model, events=as.events(test_model)[5:15,]) +} + diff --git a/man/as.models.Rd b/man/as.models.Rd new file mode 100644 index 00000000..8ac81c34 --- /dev/null +++ b/man/as.models.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.models} +\alias{as.models} +\title{as.models} +\usage{ +as.models(x, models = names(x$model)) +} +\arguments{ +\item{x}{A TRONCO model.} + +\item{models}{The name of the models to extract, e.g. 'bic', 'aic', 'caprese', all by default.} +} +\value{ +The models in a reconstructed object. +} +\description{ +Extract the models from a reconstructed object. +} +\examples{ +data(test_model) +as.models(test_model) +} + diff --git a/man/as.parents.pos.Rd b/man/as.parents.pos.Rd new file mode 100644 index 00000000..0cee4109 --- /dev/null +++ b/man/as.parents.pos.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.parents.pos} +\alias{as.parents.pos} +\title{as.parents.pos} +\usage{ +as.parents.pos(x, events = as.events(x), models = names(x$model)) +} +\arguments{ +\item{x}{A TRONCO model.} + +\item{events}{A subset of events as of \code{as.events(x)}, all by default.} + +\item{models}{A subset of reconstructed models, all by default.} +} +\value{ +A list of parents for each node +} +\description{ +Get parents for each node +} + diff --git a/man/as.pathway.Rd b/man/as.pathway.Rd new file mode 100644 index 00000000..bf858655 --- /dev/null +++ b/man/as.pathway.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.pathway} +\alias{as.pathway} +\title{as.pathway} +\usage{ +as.pathway(x, pathway.genes, pathway.name, pathway.color = "yellow", + aggregate.pathway = TRUE) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{pathway.genes}{Gene (symbols) involved in the pathway.} + +\item{pathway.name}{Pathway name for visualization.} + +\item{pathway.color}{Pathway color for visualization.} + +\item{aggregate.pathway}{If TRUE drop the events for the genes in the pathway.} +} +\value{ +Extract the subset of events for genes which are part of a pathway. +} +\description{ +Given a cohort and a pathway, return the cohort with events restricted to genes +involved in the pathway. This might contain a new 'pathway' genotype with an alteration mark if +any of the involved genes are altered. +} +\examples{ +data(test_dataset) +p = as.pathway(test_dataset, c('ASXL1', 'TET2'), 'test_pathway') +} + diff --git a/man/as.patterns.Rd b/man/as.patterns.Rd new file mode 100644 index 00000000..ecd490fe --- /dev/null +++ b/man/as.patterns.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.patterns} +\alias{as.patterns} +\title{as.patterns} +\usage{ +as.patterns(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +The patterns in the dataset which constitute CAPRI's hypotheses. +} +\description{ +Return the patterns in the dataset which constitute CAPRI's hypotheses. +} +\examples{ +data(test_dataset) +as.patterns(test_dataset) +} + diff --git a/man/as.samples.Rd b/man/as.samples.Rd new file mode 100644 index 00000000..899b2ac2 --- /dev/null +++ b/man/as.samples.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.samples} +\alias{as.samples} +\title{as.samples} +\usage{ +as.samples(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +A vector of sample IDs +} +\description{ +Return all sample IDs for input 'x', which should be a TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(test_dataset) +as.samples(test_dataset) +} + diff --git a/man/as.selective.advantage.relations.Rd b/man/as.selective.advantage.relations.Rd new file mode 100644 index 00000000..bde418b2 --- /dev/null +++ b/man/as.selective.advantage.relations.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.selective.advantage.relations} +\alias{as.selective.advantage.relations} +\title{as.selective.advantage.relations} +\usage{ +as.selective.advantage.relations(x, events = as.events(x), + models = names(x$model), type = "fit") +} +\arguments{ +\item{x}{A TRONCO model.} + +\item{events}{A subset of events as of \code{as.events(x)}, all by default.} + +\item{models}{A subset of reconstructed models, all by default.} + +\item{type}{Either Prima Facie ('pf') or fit ('fit') probabilities, 'fit' by default.} +} +\value{ +All the selective advantage relations in a TRONCO model +} +\description{ +Returns a dataframe with all the selective advantage relations in a +TRONCO model. Confidence is also shown - see \code{as.confidence}. It is possible to +specify a subset of events or models if multiple reconstruction have +been performed. +} +\examples{ +data(test_model) +as.selective.advantage.relations(test_model) +as.selective.advantage.relations(test_model, events=as.events(test_model)[5:15,]) +as.selective.advantage.relations(test_model, events=as.events(test_model)[5:15,], type='pf') +} + diff --git a/man/as.stages.Rd b/man/as.stages.Rd new file mode 100644 index 00000000..b29562b4 --- /dev/null +++ b/man/as.stages.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.stages} +\alias{as.stages} +\title{as.stages} +\usage{ +as.stages(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +A matrix with 1 column annotating stages and rownames as sample IDs. +} +\description{ +Return the association sample -> stage, if any. Input 'x' should be a +TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(test_dataset) +data(stage) +test_dataset = annotate.stages(test_dataset, stage) +as.stages(test_dataset) +} + diff --git a/man/as.stages.test.Rd b/man/as.stages.test.Rd new file mode 100644 index 00000000..81ac9682 --- /dev/null +++ b/man/as.stages.test.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{as.stages.test} +\alias{as.stages.test} +\title{as stages matrix} +\format{matrix} +\source{ +fake data +} +\usage{ +data(as.stages.test) +} +\description{ +This data set list ... +} +\author{ +Luca De Sano +} + diff --git a/man/as.types.Rd b/man/as.types.Rd new file mode 100644 index 00000000..3a334872 --- /dev/null +++ b/man/as.types.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.types} +\alias{as.types} +\title{as.types} +\usage{ +as.types(x, genes = NA) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{genes}{A list of genes to consider, if NA all genes are used.} +} +\value{ +A matrix with 1 column annotating stages and rownames as sample IDs. +} +\description{ +Return the types of events for a set of genes which are in 'x', which should be a +TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(test_dataset) +as.types(test_dataset) +as.types(test_dataset, genes='TET2') +} + diff --git a/man/as.types.in.patterns.Rd b/man/as.types.in.patterns.Rd new file mode 100644 index 00000000..9c4eeeb3 --- /dev/null +++ b/man/as.types.in.patterns.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{as.types.in.patterns} +\alias{as.types.in.patterns} +\title{as.types.in.patterns} +\usage{ +as.types.in.patterns(x, patterns = NULL) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{patterns}{A list of patterns for which the list will be returned} +} +\value{ +A list of types present in patterns which consitute CAPRI's hypotheses +} +\description{ +Return the list of types present in selected patterns +} +\examples{ +data(test_dataset) +as.types.in.patterns(test_dataset) +as.types.in.patterns(test_dataset, patterns='XOR_EZH2') +} + diff --git a/man/bootstrap.caprese.Rd b/man/bootstrap.caprese.Rd new file mode 100644 index 00000000..ae3bb7dd --- /dev/null +++ b/man/bootstrap.caprese.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/caprese.bootstrap.R +\name{bootstrap.caprese} +\alias{bootstrap.caprese} +\title{bootstrap.caprese} +\usage{ +bootstrap.caprese(dataset, lambda, do.estimation, silent, reconstruction, + command = "non-parametric", nboot = 100, bootstrap.statistics = list()) +} +\arguments{ +\item{dataset}{a dataset describing a progressive phenomenon} + +\item{lambda}{shrinkage parameter (value in [0,1])} + +\item{do.estimation}{should I perform the estimation of the error rates and probabilities?} + +\item{silent}{should I be verbose?} + +\item{reconstruction}{todo} + +\item{command}{type of search for the likelihood fit, either hill climbing (hc) or tabu (tabu)} + +\item{nboot}{number of bootstrap resampling to be performed} + +\item{bootstrap.statistics}{todo} +} +\value{ +bootstrap.statistics: statistics of the bootstrap +} +\description{ +perform non-parametric or parametric bootstrap to evalutate the confidence of the reconstruction +} + diff --git a/man/bootstrap.capri.Rd b/man/bootstrap.capri.Rd new file mode 100644 index 00000000..7e9427c3 --- /dev/null +++ b/man/bootstrap.capri.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.bootstrap.R +\name{bootstrap.capri} +\alias{bootstrap.capri} +\title{bootstrap.capri} +\usage{ +bootstrap.capri(dataset, hypotheses, command.capri, regularization, do.boot, + nboot.capri, pvalue, min.boot, min.stat, boot.seed, do.estimation, silent, + reconstruction, command = "non-parametric", nboot = 100, + bootstrap.statistics = list(), verbose = FALSE) +} +\arguments{ +\item{dataset}{a dataset describing a progressive phenomenon} + +\item{hypotheses}{a set of hypotheses referring to the dataset} + +\item{command.capri}{type of search, either hill climbing (hc) or tabu (tabu)} + +\item{regularization}{regularizators to be used for the likelihood fit} + +\item{do.boot}{should I perform bootstrap? Yes if TRUE, no otherwise} + +\item{nboot.capri}{integer number (greater than 0) of bootstrap sampling to be performed} + +\item{pvalue}{pvalue for the tests (value between 0 and 1)} + +\item{min.boot}{minimum number of bootstrapping to be performed} + +\item{min.stat}{should I keep bootstrapping untill I have nboot valid values?} + +\item{boot.seed}{seed to be used for the sampling} + +\item{do.estimation}{should I perform the estimation of the error rates and probabilities?} + +\item{silent}{should I be verbose?} + +\item{reconstruction}{todo} + +\item{command}{should I perform non-parametric or parametric bootstrap?} + +\item{nboot}{number of bootstrap resampling to be performed} + +\item{bootstrap.statistics}{todo} + +\item{verbose}{todo} +} +\value{ +bootstrap.statistics: statistics of the bootstrap +} +\description{ +perform non-parametric or parametric bootstrap to evalutate the confidence of the reconstruction +} + diff --git a/man/caprese.fit.Rd b/man/caprese.fit.Rd new file mode 100644 index 00000000..a08eeb7d --- /dev/null +++ b/man/caprese.fit.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/caprese.algorithm.R +\name{caprese.fit} +\alias{caprese.fit} +\title{caprese.fit} +\usage{ +caprese.fit(dataset, lambda = 0.5, do.estimation = FALSE, silent = FALSE) +} +\arguments{ +\item{dataset}{a dataset describing a progressive phenomenon} + +\item{lambda}{shrinkage parameter (value in [0,1])} + +\item{do.estimation}{should I perform the estimation of the error rates and probabilities?} + +\item{silent}{execute the algorithm in silent mode} +} +\value{ +topology: the reconstructed tree-like topology +} +\description{ +reconstruct the best tree-like topology +} + diff --git a/man/capri.fit.Rd b/man/capri.fit.Rd new file mode 100644 index 00000000..3730190a --- /dev/null +++ b/man/capri.fit.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{capri.fit} +\alias{capri.fit} +\title{capri.fit} +\usage{ +capri.fit(dataset, hypotheses = NA, command = "hc", + regularization = c("bic", "aic"), do.boot = TRUE, nboot = 100, + pvalue = 0.05, min.boot = 3, min.stat = TRUE, boot.seed = NULL, + do.estimation = FALSE, silent = FALSE) +} +\arguments{ +\item{dataset}{a dataset describing a progressive phenomenon} + +\item{hypotheses}{hypotheses to be considered in the reconstruction} + +\item{command}{type of search for the likelihood fit, either hill climbing (hc) or tabu (tabu)} + +\item{regularization}{regularizators to be used for the likelihood fit} + +\item{do.boot}{should I perform bootstrap? Yes if TRUE, no otherwise} + +\item{nboot}{integer number (greater than 0) of bootstrap sampling to be performed} + +\item{pvalue}{pvalue for the tests (value between 0 and 1)} + +\item{min.boot}{minimum number of bootstrapping to be performed} + +\item{min.stat}{should I keep bootstrapping untill I have nboot valid values?} + +\item{boot.seed}{seed to be used for the sampling} + +\item{do.estimation}{should I perform the estimation of the error rates and probabilities?} + +\item{silent}{should I be verbose?} +} +\value{ +topology: the reconstructed tree topology +} +\description{ +reconstruct the best dag topology running CAPRI algorithm +} + diff --git a/man/cbio.query.Rd b/man/cbio.query.Rd new file mode 100644 index 00000000..7ab69f9c --- /dev/null +++ b/man/cbio.query.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/loading.R +\name{cbio.query} +\alias{cbio.query} +\title{cbio.query} +\usage{ +cbio.query(cbio.study = NA, cbio.dataset = NA, cbio.profile = NA, genes) +} +\arguments{ +\item{cbio.study}{Cbio study ID} + +\item{cbio.dataset}{Cbio dataset ID} + +\item{cbio.profile}{Cbio genetic profile ID} + +\item{genes}{A list of < 900 genes to query} +} +\value{ +A list with two dataframe: the gentic profile required and clinical data for the Cbio study. +} +\description{ +Wrapper for the CGDS package to query the Cbio portal. This can work either automatically, if one +sets \code{cbio.study}, \code{cbio.dataset} or \code{cbio.profile}, or interactively otherwise. A +list of genes to query with less than 900 entries should be provided. This function returns a list +with two dataframe: the gentic profile required and clinical data for the Cbio study. Output is also +saved to disk as Rdata file. See also http://www.cbioportal.org. +} + diff --git a/man/change.color.Rd b/man/change.color.Rd new file mode 100644 index 00000000..2cddb4ff --- /dev/null +++ b/man/change.color.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{change.color} +\alias{change.color} +\title{change.color} +\usage{ +change.color(x, type, new.color) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{type}{An event type} + +\item{new.color}{The new color (either HEX or R Color)} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Change the color of an event type +} +\examples{ +data(test_dataset) +dataset = change.color(test_dataset, 'ins_del', 'red') +} + diff --git a/man/check.dataset.Rd b/man/check.dataset.Rd new file mode 100644 index 00000000..6b325caa --- /dev/null +++ b/man/check.dataset.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{check.dataset} +\alias{check.dataset} +\title{check.dataset} +\usage{ +check.dataset(dataset, adj.matrix, verbose) +} +\arguments{ +\item{dataset}{a dataset describing a progressive phenomenon} + +\item{adj.matrix}{adjacency matrix of the topology} + +\item{verbose}{should I print the warnings? Yes if TRUE, no otherwise} +} +\value{ +valid.dataset: a dataset valid accordingly to the probability raising +} +\description{ +check if the dataset is valid accordingly to the probability raising +} + diff --git a/man/confidence.Rd b/man/confidence.Rd deleted file mode 100644 index 82744ca2..00000000 --- a/man/confidence.Rd +++ /dev/null @@ -1,58 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\name{confidence} -\alias{confidence} -\alias{confidence.conditional} -\alias{confidence.data.conditional} -\alias{confidence.data.joint} -\alias{confidence.data.single} -\alias{confidence.fit.conditional} -\alias{confidence.fit.joint} -\alias{confidence.fit.single} -\alias{confidence.joint} -\alias{confidence.single} -\title{provides various kinds of confidence measures for an inferred progression model} -\usage{ -confidence.data.joint(topology) - -confidence.fit.joint(topology) - -confidence.data.single(topology) - -confidence.fit.single(topology) - -confidence.data.conditional(topology) - -confidence.fit.conditional(topology) - -confidence.single(topology) - -confidence.joint(topology) - -confidence.conditional(topology) -} -\arguments{ -\item{topology}{A topology returned by the reconstruction algorithm} -} -\description{ -A set of functions to visualise and compare the probability of each event in the progression model, as well as their joint and conditional distributions. These can be evaluated both in the data (observed probabilities) and in the reconstructed model (fitted probabilities). -} -\details{ -\code{confidence.data.joint} plot the pairwise observed joint probability of the events - -\code{confidence.fit.joint} plot the pairwise fitted joint probability of the events - -\code{confidence.data.single} plot the observed probability of each event - -\code{confidence.fit.single} plot the fitted probability of each event - -\code{confidence.data.conditional} plot the pairwise observed conditional probability of the events - -\code{confidence.fit.conditional} plot the pairwise fitted conditional probability of the events - -\code{confidence.single} plot the difference between the observed and fitted probability of each event - -\code{confidence.joint} plot the pairwise difference between the observed and fitted joint probability of the events - -\code{confidence.conditional} plot the pairwise difference between the observed and fitted conditional probability of the events -} - diff --git a/man/data.load.Rd b/man/data.load.Rd deleted file mode 100644 index fc662fda..00000000 --- a/man/data.load.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\name{data.load} -\alias{data.load} -\title{load a dataset (binary matrix) from a file or a preloaded dataset.} -\usage{ -data.load(data.input) -} -\arguments{ -\item{data.input}{The input file path. or a dataset loaded by \code{data} function} -} -\description{ -\code{data.load} sets a global data frame 'data.values' that contains the dataset loaded from an input file. -} -\details{ -\code{data.load} loads a dataset from disk and associates all columns in the dataset to a specified event. Thus, types and events must be specified before calling this function to ensure a consistency check is performed on the input dataset (see \code{types.load}, \code{types.add}, \code{events.load}, \code{events.add} to load/add types/events). -} - diff --git a/man/decimal.to.binary.dag.Rd b/man/decimal.to.binary.dag.Rd new file mode 100644 index 00000000..5a5a3c26 --- /dev/null +++ b/man/decimal.to.binary.dag.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.bootstrap.R +\name{decimal.to.binary.dag} +\alias{decimal.to.binary.dag} +\title{decimal.to.binary.dag} +\usage{ +decimal.to.binary.dag(num.decimal, num.bits) +} +\arguments{ +\item{num.decimal}{decimal integer to be converted} + +\item{num.bits}{number of bits to be used} +} +\value{ +num.binary: binary conversion of num.decimal +} +\description{ +convert an integer decimal number to binary +} + diff --git a/man/decimal.to.binary.tree.Rd b/man/decimal.to.binary.tree.Rd new file mode 100644 index 00000000..6cedb7ab --- /dev/null +++ b/man/decimal.to.binary.tree.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/caprese.bootstrap.R +\name{decimal.to.binary.tree} +\alias{decimal.to.binary.tree} +\title{decimal.to.binary.tree} +\usage{ +decimal.to.binary.tree(num.decimal, num.bits) +} +\arguments{ +\item{num.decimal}{decimal integer to be converted} + +\item{num.bits}{number of bits to be used} +} +\value{ +num.binary: binary conversion of num.decimal +} +\description{ +convert an integer decimal number to binary +} + diff --git a/man/delete.event.Rd b/man/delete.event.Rd new file mode 100644 index 00000000..b75c5ac0 --- /dev/null +++ b/man/delete.event.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{delete.event} +\alias{delete.event} +\title{delete.event} +\usage{ +delete.event(x, gene, type) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{gene}{The name of the gene to delete.} + +\item{type}{The name of the type to delete.} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Delete an event from the dataset +} +\examples{ +data(test_dataset) +test_dataset = delete.event(test_dataset, 'TET2', 'ins_del') +} + diff --git a/man/delete.gene.Rd b/man/delete.gene.Rd new file mode 100644 index 00000000..5f69331d --- /dev/null +++ b/man/delete.gene.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{delete.gene} +\alias{delete.gene} +\title{delete.gene} +\usage{ +delete.gene(x, gene) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{gene}{The name of the gene to delete.} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Delete a gene +} +\examples{ +data(test_dataset) +test_dataset = delete.gene(test_dataset, 'TET2') +} + diff --git a/man/delete.hypothesis.Rd b/man/delete.hypothesis.Rd new file mode 100644 index 00000000..36cd889c --- /dev/null +++ b/man/delete.hypothesis.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{delete.hypothesis} +\alias{delete.hypothesis} +\title{delete.hypothesis} +\usage{ +delete.hypothesis(x, event = NA, cause = NA, effect = NA) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{event}{Can be an event or pattern name} + +\item{cause}{Can be an event or pattern name} + +\item{effect}{Can be an event or pattern name} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Delete an hypothesis from the dataset based on a selected event. +Check if the selected event exist in the dataset and delete his associated hypothesis +} +\examples{ +data(test_dataset) +delete.hypothesis(test_dataset, event='TET2') +delete.hypothesis(test_dataset, cause='EZH2') +delete.hypothesis(test_dataset, event='XOR_EZH2') +} + diff --git a/man/delete.model.Rd b/man/delete.model.Rd new file mode 100644 index 00000000..2cc3f753 --- /dev/null +++ b/man/delete.model.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{delete.model} +\alias{delete.model} +\title{delete.model} +\usage{ +delete.model(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Delete a reconstructed model from the dataset +} +\examples{ +data(test_model) +model = delete.model(test_model) +has.model(model) +} + diff --git a/man/delete.pattern.Rd b/man/delete.pattern.Rd new file mode 100644 index 00000000..330d9932 --- /dev/null +++ b/man/delete.pattern.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{delete.pattern} +\alias{delete.pattern} +\title{delete.pattern} +\usage{ +delete.pattern(x, pattern) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{pattern}{A pattern name} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Delete a pattern and every associated hypotheses from the dataset +} +\examples{ +data(test_dataset) +delete.pattern(test_dataset, pattern='XOR_EZH2') +} + diff --git a/man/delete.samples.Rd b/man/delete.samples.Rd new file mode 100644 index 00000000..b157fe2d --- /dev/null +++ b/man/delete.samples.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{delete.samples} +\alias{delete.samples} +\title{delete.samples} +\usage{ +delete.samples(x, samples) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{samples}{An array of samples name} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Delete samples from selected dataset +} +\examples{ +data(test_dataset) +dataset = delete.samples(test_dataset, c('patient 1', 'patient 4')) +} + diff --git a/man/delete.type.Rd b/man/delete.type.Rd new file mode 100644 index 00000000..57c122aa --- /dev/null +++ b/man/delete.type.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{delete.type} +\alias{delete.type} +\title{delete.type} +\usage{ +delete.type(x, type) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{type}{The name of the type to delete.} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Delete an event type +} +\examples{ +data(test_dataset) +test_dataset = delete.type(test_dataset, 'Pattern') +} + diff --git a/man/duplicates.Rd b/man/duplicates.Rd new file mode 100644 index 00000000..fbf58b32 --- /dev/null +++ b/man/duplicates.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{duplicates} +\alias{duplicates} +\title{duplicates} +\usage{ +duplicates(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +A subset of \code{as.events(x)} with duplicated events. +} +\description{ +Return the events duplicated in \code{x}, if any. Input 'x' should be +a TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(test_dataset) +duplicates(test_dataset) +} + diff --git a/man/ebind.Rd b/man/ebind.Rd new file mode 100644 index 00000000..3f75c1f7 --- /dev/null +++ b/man/ebind.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{ebind} +\alias{ebind} +\title{ebind} +\usage{ +ebind(...) +} +\arguments{ +\item{...}{the input datasets} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Binds events from one or more datasets, which must be defined over the same set of samples. +} + diff --git a/man/enforce.numeric.Rd b/man/enforce.numeric.Rd new file mode 100644 index 00000000..2d9f9cad --- /dev/null +++ b/man/enforce.numeric.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{enforce.numeric} +\alias{enforce.numeric} +\title{enforce.numeric} +\usage{ +enforce.numeric(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +Convert the internal reprensentation of genotypes to numeric, if not. +} +\description{ +Convert the internal reprensentation of genotypes to numeric, if not. +} +\examples{ +data(test_dataset) +test_dataset = enforce.numeric(test_dataset) +} + diff --git a/man/enforce.string.Rd b/man/enforce.string.Rd new file mode 100644 index 00000000..0956f1df --- /dev/null +++ b/man/enforce.string.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{enforce.string} +\alias{enforce.string} +\title{enforce.string} +\usage{ +enforce.string(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +Convert the internal reprensentation of genotypes to character, if not. +} +\description{ +Convert the internal representation of genotypes to character, if not. +} +\examples{ +data(test_dataset) +test_dataset = enforce.string(test_dataset) +} + diff --git a/man/enumerate.all.paths.Rd b/man/enumerate.all.paths.Rd new file mode 100644 index 00000000..1e1b5263 --- /dev/null +++ b/man/enumerate.all.paths.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.estimation.R +\name{enumerate.all.paths} +\alias{enumerate.all.paths} +\title{enumerate.all.paths} +\usage{ +enumerate.all.paths(ancestor.node, child.node, parents.pos) +} +\arguments{ +\item{ancestor.node}{first node of the path} + +\item{child.node}{last node of the path} + +\item{parents.pos}{topological connections} +} +\value{ +all.paths: vector of all the paths +} +\description{ +enumerate all the paths between two nodes of a DAG +} + diff --git a/man/estimate.dag.error.rates.Rd b/man/estimate.dag.error.rates.Rd new file mode 100644 index 00000000..695b5511 --- /dev/null +++ b/man/estimate.dag.error.rates.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.estimation.R +\name{estimate.dag.error.rates} +\alias{estimate.dag.error.rates} +\title{estimate.dag.error.rates} +\usage{ +estimate.dag.error.rates(dataset, marginal.probs, joint.probs, parents.pos) +} +\arguments{ +\item{dataset}{a valid dataset} + +\item{marginal.probs}{marginal probabilities} + +\item{joint.probs}{joint probabilities} + +\item{parents.pos}{which event is the parent? 0 if none, a number otherwise} +} +\value{ +estimated.error.rates: estimated probabilities, false positive and false negative error rates +} +\description{ +estimate the error rates by "L-BFGS-B" optimization in terms of L2-error +} + diff --git a/man/estimate.dag.joint.probs.Rd b/man/estimate.dag.joint.probs.Rd new file mode 100644 index 00000000..b7ae99f5 --- /dev/null +++ b/man/estimate.dag.joint.probs.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.estimation.R +\name{estimate.dag.joint.probs} +\alias{estimate.dag.joint.probs} +\title{estimate.dag.joint.probs} +\usage{ +estimate.dag.joint.probs(first.node, second.node, parents.pos, marginal.probs, + conditional.probs) +} +\arguments{ +\item{first.node}{first node} + +\item{second.node}{second node} + +\item{parents.pos}{which event is the parent? -1 if none, a list otherwise} + +\item{marginal.probs}{marginal probabilities} + +\item{conditional.probs}{conditional probabilities} +} +\value{ +estimated.dag.joint.probs: estimated theoretical joint probability +} +\description{ +estimate the theoretical joint probability of two given nodes given the reconstructed topology +} + diff --git a/man/estimate.dag.probs.Rd b/man/estimate.dag.probs.Rd new file mode 100644 index 00000000..871d3310 --- /dev/null +++ b/man/estimate.dag.probs.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.estimation.R +\name{estimate.dag.probs} +\alias{estimate.dag.probs} +\title{estimate.dag.probs} +\usage{ +estimate.dag.probs(dataset, marginal.probs, joint.probs, parents.pos, + error.rates) +} +\arguments{ +\item{dataset}{a valid dataset} + +\item{marginal.probs}{observed marginal probabilities} + +\item{joint.probs}{observed joint probabilities} + +\item{parents.pos}{position of the parents in the list of nodes} + +\item{error.rates}{rates for the false positive and the false negative errors} +} +\value{ +estimated.probs: estimated marginal, joint and conditional probabilities +} +\description{ +estimate the marginal, joint and conditional probabilities given the reconstructed topology and the error rates +} + diff --git a/man/estimate.dag.samples.Rd b/man/estimate.dag.samples.Rd new file mode 100644 index 00000000..f1d6ec4b --- /dev/null +++ b/man/estimate.dag.samples.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.estimation.R +\name{estimate.dag.samples} +\alias{estimate.dag.samples} +\title{estimate.dag.samples} +\usage{ +estimate.dag.samples(dataset, reconstructed.topology, + estimated.marginal.probabilities, estimated.conditional.probabilities, + parents.pos, error.rates) +} +\arguments{ +\item{dataset}{a valid dataset} + +\item{reconstructed.topology}{the reconstructed topology} + +\item{estimated.marginal.probabilities}{estimated marginal probabilities of the events} + +\item{estimated.conditional.probabilities}{estimated conditional probabilities of the events} + +\item{parents.pos}{position of the parents of each node} + +\item{error.rates}{error rates for false positives and false negatives} +} +\value{ +probabilities: probability of each sample +} +\description{ +estimate the probability of observing each sample in the dataset given the reconstructed topology +} + diff --git a/man/estimate.tree.error.rates.Rd b/man/estimate.tree.error.rates.Rd new file mode 100644 index 00000000..aafe7b97 --- /dev/null +++ b/man/estimate.tree.error.rates.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/caprese.estimation.R +\name{estimate.tree.error.rates} +\alias{estimate.tree.error.rates} +\title{estimate.tree.error.rates} +\usage{ +estimate.tree.error.rates(marginal.probs, joint.probs, parents.pos) +} +\arguments{ +\item{marginal.probs}{marginal probabilities} + +\item{joint.probs}{joint probabilities} + +\item{parents.pos}{which event is the parent? 0 if none, a number otherwise} +} +\value{ +estimated.error.rates: estimated probabilities, false positive and false negative error rates +} +\description{ +estimate the error rates by "L-BFGS-B" optimization in terms of L2-error +} + diff --git a/man/estimate.tree.joint.probs.Rd b/man/estimate.tree.joint.probs.Rd new file mode 100644 index 00000000..51a1e50f --- /dev/null +++ b/man/estimate.tree.joint.probs.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/caprese.estimation.R +\name{estimate.tree.joint.probs} +\alias{estimate.tree.joint.probs} +\title{estimate.tree.joint.probs} +\usage{ +estimate.tree.joint.probs(first.node, second.node, parents.pos, marginal.probs, + conditional.probs) +} +\arguments{ +\item{first.node}{first node} + +\item{second.node}{second node} + +\item{parents.pos}{which event is the parent? -1 if none, a number otherwise} + +\item{marginal.probs}{marginal probabilities} + +\item{conditional.probs}{conditional probabilities} +} +\value{ +estimated.tree.joint.probs: estimated theoretical joint probability +} +\description{ +estimate the theoretical joint probability of two given nodes given the reconstructed topology +} + diff --git a/man/estimate.tree.probs.Rd b/man/estimate.tree.probs.Rd new file mode 100644 index 00000000..b6736c0e --- /dev/null +++ b/man/estimate.tree.probs.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/caprese.estimation.R +\name{estimate.tree.probs} +\alias{estimate.tree.probs} +\title{estimate.tree.probs} +\usage{ +estimate.tree.probs(marginal.probs, joint.probs, parents.pos, error.rates) +} +\arguments{ +\item{marginal.probs}{observed marginal probabilities} + +\item{joint.probs}{observed joint probabilities} + +\item{parents.pos}{position of the parents in the list of nodes} + +\item{error.rates}{rates for the false positive and the false negative errors} +} +\value{ +estimated.probs estimated marginal, joint and conditional probabilities +} +\description{ +estimate the marginal, joint and conditional probabilities given the reconstructed topology and the error rates +} + diff --git a/man/estimate.tree.samples.Rd b/man/estimate.tree.samples.Rd new file mode 100644 index 00000000..2986e84d --- /dev/null +++ b/man/estimate.tree.samples.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/caprese.estimation.R +\name{estimate.tree.samples} +\alias{estimate.tree.samples} +\title{estimate.tree.samples} +\usage{ +estimate.tree.samples(dataset, reconstructed.topology, + estimated.marginal.probabilities, estimated.conditional.probabilities, + error.rates) +} +\arguments{ +\item{dataset}{a valid dataset} + +\item{reconstructed.topology}{the reconstructed topology} + +\item{estimated.marginal.probabilities}{estimated marginal probabilities of the events} + +\item{estimated.conditional.probabilities}{estimated conditional probabilities of the events} + +\item{error.rates}{error rates for false positives and false negatives} +} +\value{ +probabilities: probability of each sample +} +\description{ +estimate the probability of observing each sample in the dataset given the reconstructed topology +} + diff --git a/man/events.Rd b/man/events.Rd deleted file mode 100644 index e5376b39..00000000 --- a/man/events.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\docType{data} -\name{events} -\alias{events} -\title{Events collection for Ovarian cancer CGH data} -\format{An example with 7 events} -\description{ -This example contains a collection of events associeted to the -Ovarian cancer CGH dataset -} - diff --git a/man/events.add.Rd b/man/events.add.Rd deleted file mode 100644 index 7973cffc..00000000 --- a/man/events.add.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\name{events.add} -\alias{events.add} -\title{add a new event (e.g., a missense point mutation for EGFR)} -\usage{ -events.add(event.name, type.name, column.number = NA) -} -\arguments{ -\item{event.name}{The event label(e.g., 'EGFR') . All event labels are strings.} - -\item{type.name}{The type name of this event (e.g., 'missense point'). Type names must refer to types loaded before adding an event, a consistency check raises an error if the type name is unknown.} - -\item{column.number}{The dataset column to which this event is associated. Column number must be an -integer positive value.} -} -\description{ -\code{events.add} sets a global data frame 'events' that contains all the events defined. Events can be added and refined incrementally, in any order. -} -\details{ -\code{events.add} allows to define one event at a time. If the event was previously defined, its definition is updated to keep track of its last definition. A consistency check is performed to ensure that the type of defined event is valid. Thus, types must be defined before events are loaded (see \code{types.add}, \code{types.load}). -} -\examples{ -types.add("gain", "red") -events.add("8q+", "gain", 1) -} - diff --git a/man/events.load.Rd b/man/events.load.Rd deleted file mode 100644 index 31dbd1be..00000000 --- a/man/events.load.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\name{events.load} -\alias{events.load} -\title{load a set of events from file} -\usage{ -events.load(data.input) -} -\arguments{ -\item{data.input}{The input file path or a dataset to be validated.} -} -\description{ -\code{events.load} sets a global data frame 'events' that contains all event definitions found in a specified file or dataset to be validated. This is a way to automatise calls to function \code{events.add} for a bunch of events. -} -\details{ -\code{events.load} load a set of events from a given file. The input file must be structured as a CSV file, where each event is defined on a separate line in the format: eventName, typeName, columnNumber. -} -\seealso{ -\code{\link{events.add}} -} - diff --git a/man/events.selection.Rd b/man/events.selection.Rd new file mode 100644 index 00000000..1eff11ef --- /dev/null +++ b/man/events.selection.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/selection.R +\name{events.selection} +\alias{events.selection} +\title{events.selection} +\usage{ +events.selection(x, filter.freq = NA, filter.in.names = NA, + filter.out.names = NA) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{filter.freq}{[0,1] value which constriants the minimum frequence of selected events} + +\item{filter.in.names}{gene symbols which will be included} + +\item{filter.out.names}{gene symbols which will NOT be included} +} +\value{ +A TRONCO compliant dataset. +} +\description{ +select a subset of the input genotypes 'x'. Selection can be done +by frequency and gene symbols. +} +\examples{ +data(test_dataset) +dataset = events.selection(test_dataset, 0.3) +} + diff --git a/man/export.mutex.Rd b/man/export.mutex.Rd new file mode 100644 index 00000000..25322eb3 --- /dev/null +++ b/man/export.mutex.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/external.R +\name{export.mutex} +\alias{export.mutex} +\title{export,mutex} +\usage{ +export.mutex(x, filename = "tronco_to_mutex", filepath = "./", + label.mutation = "SNV", label.amplification = list("High-level Gain"), + label.deletion = list("Homozygous Loss")) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{filename}{The name of the file} + +\item{filepath}{The path where to save the file} + +\item{label.mutation}{The event type to use as mutation} + +\item{label.amplification}{The event type to use as amplification (can be a list)} + +\item{label.deletion}{The event type to use as amplification (can be a list)} +} +\value{ +A MUTEX example matrix +} +\description{ +Create an input file for MUTEX +(ref: https://code.google.com/p/mutex/ ) +} +\examples{ +data(gistic) +dataset = import.GISTIC(gistic) +export.mutex(dataset) +} + diff --git a/man/export.nbs.input.Rd b/man/export.nbs.input.Rd new file mode 100644 index 00000000..29cd3bde --- /dev/null +++ b/man/export.nbs.input.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/external.R +\name{export.nbs.input} +\alias{export.nbs.input} +\title{export.nbs.input} +\usage{ +export.nbs.input(x, map_hugo_entrez, file = "tronco_to_nbs.mat") +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{map_hugo_entrez}{Hugo_Symbol-Entrez_Gene_Id map} + +\item{file}{output file name} +} +\description{ +Create a .mat file which can be used with NBS clustering +(ref: http://chianti.ucsd.edu/~mhofree/wordpress/?page_id=26) +} + diff --git a/man/extract.MAF.HuGO.Entrez.map.Rd b/man/extract.MAF.HuGO.Entrez.map.Rd new file mode 100644 index 00000000..3b2e65dd --- /dev/null +++ b/man/extract.MAF.HuGO.Entrez.map.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/loading.R +\name{extract.MAF.HuGO.Entrez.map} +\alias{extract.MAF.HuGO.Entrez.map} +\title{extract.MAF.HuGO.Entrez.map} +\usage{ +extract.MAF.HuGO.Entrez.map(file, sep = "\\t") +} +\arguments{ +\item{file}{MAF filename} + +\item{sep}{MAF separator, default \'\\t\'} +} +\value{ +A mapHugo_Symbol -> Entrez_Gene_Id. +} +\description{ +Extract a map Hugo_Symbol -> Entrez_Gene_Id from a MAF input file. If some genes map to ID 0 +a warning is raised. +} + diff --git a/man/genes.table.plot.Rd b/man/genes.table.plot.Rd new file mode 100644 index 00000000..b75421e7 --- /dev/null +++ b/man/genes.table.plot.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/visualization.R +\name{genes.table.plot} +\alias{genes.table.plot} +\title{genes.table.plot} +\usage{ +genes.table.plot(x, name, dir = getwd()) +} +\arguments{ +\item{x}{A TRONCO compliant dataset} + +\item{name}{filename} + +\item{dir}{where to save the file} +} +\description{ +Generates stacked histogram +} + diff --git a/man/genes.table.report.Rd b/man/genes.table.report.Rd new file mode 100644 index 00000000..3a0e80b9 --- /dev/null +++ b/man/genes.table.report.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/visualization.R +\name{genes.table.report} +\alias{genes.table.report} +\title{genes.table.report} +\usage{ +genes.table.report(x, name, dir = getwd(), maxrow = 33, font = 10, + height = 11, width = 8.5, fill = "lightblue") +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{name}{filename} + +\item{dir}{working directory} + +\item{maxrow}{maximum number of row per page} + +\item{font}{document fontsize} + +\item{height}{table height} + +\item{width}{table width} + +\item{fill}{fill color} +} +\value{ +LaTEX code +} +\description{ +Generate PDF and laex tables +} + diff --git a/man/get.bootstapped.scores.Rd b/man/get.bootstapped.scores.Rd new file mode 100644 index 00000000..fb5e2d5c --- /dev/null +++ b/man/get.bootstapped.scores.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{get.bootstapped.scores} +\alias{get.bootstapped.scores} +\title{get.bootstapped.scores} +\usage{ +get.bootstapped.scores(dataset, nboot, adj.matrix, min.boot = 3, + min.stat = TRUE, boot.seed = NULL, silent) +} +\arguments{ +\item{dataset}{a valid dataset} + +\item{nboot}{number of bootstrap resampling to be performed} + +\item{adj.matrix}{adjacency matrix of the initially valid edges} + +\item{min.boot}{minimum number of bootstrapping to be performed} + +\item{min.stat}{should I keep bootstrapping untill I have nboot valid values?} + +\item{boot.seed}{seed to be used for the sampling} + +\item{silent}{Should I be verbose?} +} +\value{ +scores: list structure with the scores and the data generated by bootstrap +} +\description{ +compute a robust estimation of the scores using rejection sampling bootstrap +} + diff --git a/man/get.dag.scores.Rd b/man/get.dag.scores.Rd new file mode 100644 index 00000000..e5b3e300 --- /dev/null +++ b/man/get.dag.scores.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{get.dag.scores} +\alias{get.dag.scores} +\title{get.dag.scores} +\usage{ +get.dag.scores(dataset, adj.matrix) +} +\arguments{ +\item{dataset}{a valid dataset} + +\item{adj.matrix}{adjacency matrix of the initially valid edges} +} +\value{ +scores: observed probabilities and prima facie scores +} +\description{ +compute the observed probabilities and the prima facie scores on the dataset +} + diff --git a/man/get.prima.facie.causes.do.boot.Rd b/man/get.prima.facie.causes.do.boot.Rd new file mode 100644 index 00000000..9f0e59d7 --- /dev/null +++ b/man/get.prima.facie.causes.do.boot.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{get.prima.facie.causes.do.boot} +\alias{get.prima.facie.causes.do.boot} +\title{get.prima.facie.causes.do.boot} +\usage{ +get.prima.facie.causes.do.boot(adj.matrix, hypotheses, + marginal.probs.distributions, prima.facie.model.distributions, + prima.facie.null.distributions, pvalue, dataset, marginal.probs, joint.probs, + silent = FALSE) +} +\arguments{ +\item{adj.matrix}{adjacency matrix of the initially valid edges} + +\item{hypotheses}{hypotheses to be considered} + +\item{marginal.probs.distributions}{distributions of the bootstrapped marginal probabilities} + +\item{prima.facie.model.distributions}{distributions of the prima facie model} + +\item{prima.facie.null.distributions}{distributions of the prima facie null} + +\item{pvalue}{minimum pvalue for the Mann-Whitney U tests to be significant} + +\item{dataset}{a valid dataset} + +\item{marginal.probs}{observed marginal probabilities} + +\item{joint.probs}{observed joint probabilities} + +\item{silent}{Should I be verbose?} +} +\value{ +prima.facie.topology: list describing the topology of the prima facie causes +} +\description{ +select the best set of prima facie causes per node +} + diff --git a/man/get.prima.facie.causes.no.boot.Rd b/man/get.prima.facie.causes.no.boot.Rd new file mode 100644 index 00000000..8df41bd0 --- /dev/null +++ b/man/get.prima.facie.causes.no.boot.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{get.prima.facie.causes.no.boot} +\alias{get.prima.facie.causes.no.boot} +\title{get.prima.facie.causes.no.boot} +\usage{ +get.prima.facie.causes.no.boot(adj.matrix, hypotheses, marginal.probs, + prima.facie.model, prima.facie.null, dataset, joint.probs, silent = FALSE) +} +\arguments{ +\item{adj.matrix}{adjacency matrix of the initially valid edges} + +\item{hypotheses}{hypotheses object related to adjacency matrix} + +\item{marginal.probs}{observed marginal probabilities} + +\item{prima.facie.model}{prima facie model} + +\item{prima.facie.null}{prima facie null} + +\item{dataset}{a valid dataset} + +\item{joint.probs}{observed joint probabilities} + +\item{silent}{Should I be verbose?} +} +\value{ +prima.facie.topology: adjacency matrix of the prima facie causes +} +\description{ +select the best set of prima facie causes per node without bootstrap +} + diff --git a/man/get.prima.facie.parents.do.boot.Rd b/man/get.prima.facie.parents.do.boot.Rd new file mode 100644 index 00000000..b0627783 --- /dev/null +++ b/man/get.prima.facie.parents.do.boot.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{get.prima.facie.parents.do.boot} +\alias{get.prima.facie.parents.do.boot} +\title{get.prima.facie.parents.do.boot} +\usage{ +get.prima.facie.parents.do.boot(dataset, hypotheses, nboot, pvalue, adj.matrix, + min.boot, min.stat, boot.seed, silent) +} +\arguments{ +\item{dataset}{a valid dataset} + +\item{hypotheses}{hypotheses object related to dataset} + +\item{nboot}{integer number (greater than 0) of bootstrap sampling to be performed} + +\item{pvalue}{pvalue for the tests (value between 0 and 1)} + +\item{adj.matrix}{adjacency matrix of the initially valid edges} + +\item{min.boot}{minimum number of bootstrapping to be performed} + +\item{min.stat}{should I keep bootstrapping untill I have nboot valid values?} + +\item{boot.seed}{seed to be used for the sampling} + +\item{silent}{Should I be verbose?} +} +\value{ +prima.facie.parents list of the set (if any) of prima facie parents for each node +} +\description{ +select the set of the prima facie parents (with bootstrap) for each node +based on Suppes' definition of causation +} + diff --git a/man/get.prima.facie.parents.no.boot.Rd b/man/get.prima.facie.parents.no.boot.Rd new file mode 100644 index 00000000..77e37168 --- /dev/null +++ b/man/get.prima.facie.parents.no.boot.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{get.prima.facie.parents.no.boot} +\alias{get.prima.facie.parents.no.boot} +\title{get.prima.facie.parents.no.boot} +\usage{ +get.prima.facie.parents.no.boot(dataset, hypotheses, adj.matrix, silent) +} +\arguments{ +\item{dataset}{a valid dataset} + +\item{hypotheses}{hypotheses object associated to dataset} + +\item{adj.matrix}{adjacency matrix of the initially valid edges} + +\item{silent}{Should I be verbose?} +} +\value{ +prima.facie.parents: list of the set (if any) of prima facie parents for each node +} +\description{ +select the set of the prima facie parents (without bootstrap) for each node based on Suppes' definition of causation +} + diff --git a/man/get.tree.parents.Rd b/man/get.tree.parents.Rd new file mode 100644 index 00000000..05cb0771 --- /dev/null +++ b/man/get.tree.parents.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/caprese.algorithm.R +\name{get.tree.parents} +\alias{get.tree.parents} +\title{get.tree.parents} +\usage{ +get.tree.parents(adj.matrix, marginal.probs, joint.probs, lambda) +} +\arguments{ +\item{adj.matrix}{adjacency matrix of the valid edges} + +\item{marginal.probs}{observed marginal probabilities} + +\item{joint.probs}{observed joint probabilities} + +\item{lambda}{shrinkage parameter (value between 0 and 1)} +} +\value{ +best.parents list of the best parents +} +\description{ +select at the most one parent for each node based on the probability raising criteria +} + diff --git a/man/get.tree.scores.Rd b/man/get.tree.scores.Rd new file mode 100644 index 00000000..76f8af4d --- /dev/null +++ b/man/get.tree.scores.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/caprese.algorithm.R +\name{get.tree.scores} +\alias{get.tree.scores} +\title{get.tree.scores} +\usage{ +get.tree.scores(adj.matrix, marginal.probs, joint.probs, lambda) +} +\arguments{ +\item{adj.matrix}{adjacency matrix of the valid edges} + +\item{marginal.probs}{observed marginal probabilities} + +\item{joint.probs}{observed joint probabilities} + +\item{lambda}{shrinkage parameter (value between 0 and 1)} +} +\value{ +scores: probability raising based scores +} +\description{ +compute the probability raising based scores +} + diff --git a/man/gistic.Rd b/man/gistic.Rd new file mode 100644 index 00000000..4765ee63 --- /dev/null +++ b/man/gistic.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{gistic} +\alias{gistic} +\title{GISTIC example data} +\format{GISTIC score} +\source{ +fake data +} +\usage{ +data(gistic) +} +\description{ +This dataset contains a standard GISTIC input for TRONCO +} +\author{ +Luca De Sano +} + diff --git a/man/has.duplicates.Rd b/man/has.duplicates.Rd new file mode 100644 index 00000000..5976ba9b --- /dev/null +++ b/man/has.duplicates.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{has.duplicates} +\alias{has.duplicates} +\title{has.duplicates} +\usage{ +has.duplicates(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +TRUE if there are duplicated events in \code{x}. +} +\description{ +Return true if there are duplicated events in the TRONCO dataset 'x', which should be +a TRONCO compliant dataset - see \code{is.compliant}. Events are identified by a gene +name, e.g., a HuGO_Symbol, and a type label, e.g., c('SNP', 'KRAS') +} +\examples{ +data(test_dataset) +has.duplicates(test_dataset) +} + diff --git a/man/has.model.Rd b/man/has.model.Rd new file mode 100644 index 00000000..fefcb8c2 --- /dev/null +++ b/man/has.model.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{has.model} +\alias{has.model} +\title{has.model} +\usage{ +has.model(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +TRUE if there is a reconstructed model in \code{x}. +} +\description{ +Return true if there is a reconstructed model in the TRONCO dataset 'x', which should be +a TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(test_dataset) +has.model(test_dataset) +} + diff --git a/man/has.stages.Rd b/man/has.stages.Rd new file mode 100644 index 00000000..8951eafe --- /dev/null +++ b/man/has.stages.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{has.stages} +\alias{has.stages} +\title{has stages} +\usage{ +has.stages(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +TRUE if the TRONCO dataset has stage annotations for samples. +} +\description{ +Return true if the TRONCO dataset 'x', which should be a TRONCO compliant dataset +- see \code{is.compliant} - has stage annotations for samples. Some sample stages +might be annotated as NA, but not all. +} +\examples{ +data(test_dataset) +has.stages(test_dataset) +data(stage) +test_dataset = annotate.stages(test_dataset, stage) +has.stages(test_dataset) +} + diff --git a/man/hypotheses.expansion.Rd b/man/hypotheses.expansion.Rd new file mode 100644 index 00000000..429bde44 --- /dev/null +++ b/man/hypotheses.expansion.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.hypotheses.R +\name{hypotheses.expansion} +\alias{hypotheses.expansion} +\title{hypotheses.expansion} +\usage{ +hypotheses.expansion(input_matrix, map = list(), hidden_and = T, + expand = T, skip.disconnected = TRUE) +} +\arguments{ +\item{input_matrix}{A TRONCO adjacency matrix} + +\item{map}{hypothesis name - hypothesis adjacency matrix map} + +\item{hidden_and}{Should I visualize hidden and?} + +\item{expand}{Should I expand the hypotheses?} + +\item{skip.disconnected}{Hide disconnected node} +} +\description{ +Internal function for hypotheses expansion +} + diff --git a/man/hypothesis.add.Rd b/man/hypothesis.add.Rd new file mode 100644 index 00000000..e5579d13 --- /dev/null +++ b/man/hypothesis.add.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.hypotheses.R +\name{hypothesis.add} +\alias{hypothesis.add} +\title{hypothesis add} +\usage{ +hypothesis.add(data, pattern.label, lifted.pattern, pattern.effect = "*", + pattern.cause = "*") +} +\arguments{ +\item{data}{A TRONCO compliant dataset.} + +\item{pattern.label}{Label of the new hypothesis.} + +\item{lifted.pattern}{Vector to be added to the lifted genotype resolving the pattern related to the new hypothesis} + +\item{pattern.effect}{Possibile effects for the pattern.} + +\item{pattern.cause}{Possibile causes for the pattern.} +} +\value{ +A TRONCO compliant object with the added hypothesis +} +\description{ +hypothesis add +} + diff --git a/man/hypothesis.add.group.Rd b/man/hypothesis.add.group.Rd new file mode 100644 index 00000000..967e4055 --- /dev/null +++ b/man/hypothesis.add.group.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.hypotheses.R +\name{hypothesis.add.group} +\alias{hypothesis.add.group} +\title{hypothesis add group} +\usage{ +hypothesis.add.group(x, FUN, group, pattern.cause = "*", + pattern.effect = "*", dim.min = 2, dim.max = length(group), + min.prob = 0) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{FUN}{Type of pattern to be added, e.g., co-occurance, soft or hard exclusivity.} + +\item{group}{Group of events to be considered.} + +\item{pattern.cause}{Possibile causes for the pattern.} + +\item{pattern.effect}{Possibile effects for the pattern.} + +\item{dim.min}{Minimum cardinality of the subgroups to be considered.} + +\item{dim.max}{Maximum cardinality of the subgroups to be considered.} + +\item{min.prob}{Minimum probability associated to each valid group.} +} +\value{ +A TRONCO compliant object with the added hypotheses +} +\description{ +Add all the hypotheses related to a group of events +} + diff --git a/man/hypothesis.add.homologous.Rd b/man/hypothesis.add.homologous.Rd new file mode 100644 index 00000000..998e529b --- /dev/null +++ b/man/hypothesis.add.homologous.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.hypotheses.R +\name{hypothesis.add.homologous} +\alias{hypothesis.add.homologous} +\title{hypothesis.add.homologous} +\usage{ +hypothesis.add.homologous(x, pattern.cause = "*", pattern.effect = "*", + genes = as.genes(x), FUN = OR) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{pattern.cause}{Possibile causes for the pattern.} + +\item{pattern.effect}{Possibile effects for the pattern.} + +\item{genes}{List of genes to be considered as possible homologous. For these genes, all the types of mutations will be considered functionally equivalent.} + +\item{FUN}{Type of pattern to be added, e.g., co-occurance, soft or hard exclusivity.} +} +\value{ +A TRONCO compliant object with the added hypotheses +} +\description{ +Add all the hypotheses related to homologou events +} + diff --git a/man/hypothesis.connections.Rd b/man/hypothesis.connections.Rd new file mode 100644 index 00000000..8b969231 --- /dev/null +++ b/man/hypothesis.connections.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.hypotheses.R +\name{hypothesis.connections} +\alias{hypothesis.connections} +\title{hypothesis.connections} +\usage{ +hypothesis.connections(adj.matrix, hypotheses.label) +} +\arguments{ +\item{adj.matrix}{adjacency matrix of the topology} + +\item{hypotheses.label}{label of the hypothesis} +} +\description{ +given the adj.matrix, return the incoming and outgoing connections for any hypothesis +} + diff --git a/man/hypothesis.evaluate.cycles.Rd b/man/hypothesis.evaluate.cycles.Rd new file mode 100644 index 00000000..59658697 --- /dev/null +++ b/man/hypothesis.evaluate.cycles.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.hypotheses.R +\name{hypothesis.evaluate.cycles} +\alias{hypothesis.evaluate.cycles} +\title{hypothesis.evaluate.cycles} +\usage{ +hypothesis.evaluate.cycles(data, adj.matrix, hypotheses.labels, weights.matrix) +} +\arguments{ +\item{data}{input genotypes and its hypotheses} + +\item{adj.matrix}{adjacency matrix of the reconstructed topology} + +\item{hypotheses.labels}{label of all the existing hypotheses} + +\item{weights.matrix}{weights of any edge in the topology} +} +\description{ +evaluate cycles involving any hypothesis +} + diff --git a/man/hypothesis.expand.connections.Rd b/man/hypothesis.expand.connections.Rd new file mode 100644 index 00000000..19ae8c5b --- /dev/null +++ b/man/hypothesis.expand.connections.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.hypotheses.R +\name{hypothesis.expand.connections} +\alias{hypothesis.expand.connections} +\title{hypothesis.expand.connections} +\usage{ +hypothesis.expand.connections(label, events, incoming, outgoing, hnames, + matomic, weights.matrix) +} +\arguments{ +\item{label}{name of the hypothesis} + +\item{events}{events in the hypothesis} + +\item{incoming}{incoming connections} + +\item{outgoing}{outgoing connections} + +\item{hnames}{todo} + +\item{matomic}{todo} + +\item{weights.matrix}{weights of any edge in the topology} +} +\description{ +expand and enumerate all the connections incoming or outgoing an hypothesis +} + diff --git a/man/import.GISTIC.Rd b/man/import.GISTIC.Rd new file mode 100644 index 00000000..25d7af7b --- /dev/null +++ b/man/import.GISTIC.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/loading.R +\name{import.GISTIC} +\alias{import.GISTIC} +\title{import.GISTIC} +\usage{ +import.GISTIC(x) +} +\arguments{ +\item{x}{Either a dataframe or a filename} +} +\value{ +A TRONCO compliant representation of the input CNAs. +} +\description{ +Transform GISTIC scores for CNAs in a TRONCO compliant object. Input can be either a matrix, with columns +for each altered gene and rows for each sample; in this case colnames/rownames mut be provided. If input +is a character an attempt to load a table from file is performed. In this case the input table format +should be constitent with TCGA data for focal CNA; there should hence be: one column for each sample, +one row for each gene, a column Hugo_Symbol with every gene name and a column Entrez_Gene_Id with every + gene\'s Entrez ID. A valid GISTIC score should be any value of: "Homozygous Loss" (-2), "Heterozygous + Loss" (-1), "Low-level Gain" (+1), "High-level Gain" (+2). +} +\examples{ +data(gistic) +gistic = import.GISTIC(gistic) +gistic = annotate.description(gistic, 'Example GISTIC') +oncoprint(gistic) +} + diff --git a/man/import.MAF.Rd b/man/import.MAF.Rd new file mode 100644 index 00000000..d5fe5bad --- /dev/null +++ b/man/import.MAF.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/loading.R +\name{import.MAF} +\alias{import.MAF} +\title{import.MAF} +\usage{ +import.MAF(file, sep = "\\t", is.TCGA = TRUE) +} +\arguments{ +\item{file}{MAF filename} + +\item{sep}{MAF separator, default \'\\t\'} + +\item{is.TCGA}{TRUE if this MAF is from TCGA; thus its sample codenames can be interpreted} +} +\value{ +A TRONCO compliant representation of the input MAF +} +\description{ +Import mutation profiles from a Manual Annotation Format (MAF) file. All mutations are aggregated as a +unique event type labeled "Mutation" and assigned a color according to the default of function +\code{import.genotypes}. If this is a TCGA MAF file check for multiple samples per patient is performed +and a warning is raised if these occurr. +} +\examples{ +data(maf) +mutations = import.MAF(maf) +mutations = annotate.description(mutations, 'Example MAF') +mutations = TCGA.shorten.barcodes(mutations) +oncoprint(mutations) +} + diff --git a/man/import.genotypes.Rd b/man/import.genotypes.Rd new file mode 100644 index 00000000..e36f164f --- /dev/null +++ b/man/import.genotypes.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/loading.R +\name{import.genotypes} +\alias{import.genotypes} +\title{import.genotypes} +\usage{ +import.genotypes(geno, event.type = "variant", color = "Darkgreen") +} +\arguments{ +\item{geno}{Either a dataframe or a filename} + +\item{event.type}{Any 1 in "geno" will be interpreted as a an observed alteration labeled with type "event.type"} + +\item{color}{This is the color used for visualization of events labeled as of "event.type"} +} +\value{ +A TRONCO compliant dataset +} +\description{ +Import a matrix of 0/1 alterations as a TRONCO compliant dataset. Input "geno" can be either a dataframe or +a file name. In any case the dataframe or the table stored in the file must have a column for each altered +gene and a rows for each sample. Colnames will be used to determine gene names, if data is loaded from +file the first column will be assigned as rownames. +} + diff --git a/man/import.mutex.groups.Rd b/man/import.mutex.groups.Rd new file mode 100644 index 00000000..dbca1c9f --- /dev/null +++ b/man/import.mutex.groups.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/external.R +\name{import.mutex.groups} +\alias{import.mutex.groups} +\title{import.mutex.groups} +\usage{ +import.mutex.groups(file, fdr = 0.2, display = TRUE) +} +\arguments{ +\item{file}{Mutex results ("ranked-groups.txt" file)} + +\item{fdr}{cutoff for fdr} + +\item{display}{print summary table of extracted groups} +} +\description{ +Create a list of unique Mutex groups for a given fdr cutoff +current Mutex version is Jan 8, 2015 +(ref: https://code.google.com/p/mutex/ ) +} + diff --git a/man/intersect.datasets.Rd b/man/intersect.datasets.Rd new file mode 100644 index 00000000..6f51abb6 --- /dev/null +++ b/man/intersect.datasets.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{intersect.datasets} +\alias{intersect.datasets} +\title{intersect.datasets} +\usage{ +intersect.datasets(x, y, intersect.genomes = TRUE) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{y}{A TRONCO compliant dataset.} + +\item{intersect.genomes}{If False -> just samples} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Intersect samples and events of two dataset +} +\examples{ +data(test_dataset) +} + diff --git a/man/is.compliant.Rd b/man/is.compliant.Rd new file mode 100644 index 00000000..e089cb19 --- /dev/null +++ b/man/is.compliant.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/correctness.R +\name{is.compliant} +\alias{is.compliant} +\title{is.compliant} +\usage{ +is.compliant(x, err.fun = "[ERR]", stage = !(all(is.null(x$stages)) || + all(is.na(x$stages)))) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{err.fun}{string which identifies the function which called is.compliant} + +\item{stage}{boolean flag to check x$stage datagframe} +} +\value{ +on error stops the computation +} +\description{ +Check if 'x' is compliant with TRONCO's input: that is if it has dataframes +x$genotypes, x$annotations, x$types and x$stage (optional) +} + diff --git a/man/is.events.list.Rd b/man/is.events.list.Rd new file mode 100644 index 00000000..0704346c --- /dev/null +++ b/man/is.events.list.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/correctness.R +\name{is.events.list} +\alias{is.events.list} +\title{is.events.list} +\usage{ +is.events.list(x, y) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{y}{A TRONCO event list} +} +\description{ +Check if y is a valid event list for x +} + diff --git a/man/is.logic.node.Rd b/man/is.logic.node.Rd new file mode 100644 index 00000000..d6bd7fdb --- /dev/null +++ b/man/is.logic.node.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{is.logic.node} +\alias{is.logic.node} +\title{is logical node} +\usage{ +is.logic.node(node) +} +\arguments{ +\item{node}{A node identifier} +} +\value{ +boolean +} +\description{ +Check if logic node down or up +} + diff --git a/man/is.logic.node.down.Rd b/man/is.logic.node.down.Rd new file mode 100644 index 00000000..4bbfd946 --- /dev/null +++ b/man/is.logic.node.down.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{is.logic.node.down} +\alias{is.logic.node.down} +\title{is logical node down} +\usage{ +is.logic.node.down(node) +} +\arguments{ +\item{node}{A node identifier} +} +\value{ +boolean +} +\description{ +Check if logic node down +} + diff --git a/man/is.logic.node.up.Rd b/man/is.logic.node.up.Rd new file mode 100644 index 00000000..76afc98a --- /dev/null +++ b/man/is.logic.node.up.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{is.logic.node.up} +\alias{is.logic.node.up} +\title{is logical node up} +\usage{ +is.logic.node.up(node) +} +\arguments{ +\item{node}{A node identifier} +} +\value{ +boolean +} +\description{ +Check if logic node up +} + diff --git a/man/is.model.Rd b/man/is.model.Rd new file mode 100644 index 00000000..6454b25e --- /dev/null +++ b/man/is.model.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/correctness.R +\name{is.model} +\alias{is.model} +\title{is.model} +\usage{ +is.model(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\description{ +Check if x is a valid TRONCO model +} + diff --git a/man/keysToNames.Rd b/man/keysToNames.Rd new file mode 100644 index 00000000..12f4044d --- /dev/null +++ b/man/keysToNames.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{keysToNames} +\alias{keysToNames} +\title{keysToNames} +\usage{ +keysToNames(x, matrix) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{matrix}{A matrix with colnames/rownames which represent genotypes keys.} +} +\value{ +The matrix with intelligible colnames/rownames. +} +\description{ +Convert colnames/rownames of a matrix into intelligible event names, e.g., change a key G23 in 'Mutation KRAS'. +If a name is not found, the original name is left unchanged. +} +\examples{ +data(test_model) +adj_matrix = as.adj.matrix(test_model, events=as.events(test_model)[5:15,])$bic +keysToNames(test_model, adj_matrix) +} + diff --git a/man/maf.Rd b/man/maf.Rd new file mode 100644 index 00000000..fbbf4a5f --- /dev/null +++ b/man/maf.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{maf} +\alias{maf} +\title{MAF example data} +\format{Manual Annotated Format} +\source{ +fake data +} +\usage{ +data(maf) +} +\description{ +This dataset contains a standard MAF input for TRONCO +} +\author{ +Luca De Sano +} + diff --git a/man/merge.events.Rd b/man/merge.events.Rd new file mode 100644 index 00000000..846aea7d --- /dev/null +++ b/man/merge.events.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{merge.events} +\alias{merge.events} +\title{merge.events} +\usage{ +\method{merge}{events}(x, ..., new.event, new.type, event.color) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{...}{A list of events to merge} + +\item{new.event}{The name of the resultant event} + +\item{new.type}{The type of the new event} + +\item{event.color}{The color of the new event} +} +\value{ +A TRONCO compliant dataset. +} +\description{ +Merge a list of events in an unique event +} +\examples{ +data(muts) +dataset = merge.events(muts, 'G1', 'G2', new.event='test', new.type='banana', event.color='yellow') +} + diff --git a/man/merge.types.Rd b/man/merge.types.Rd new file mode 100644 index 00000000..7a6b6bd0 --- /dev/null +++ b/man/merge.types.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{merge.types} +\alias{merge.types} +\title{merge.types} +\usage{ +\method{merge}{types}(x, ..., new.type = "new.type", new.color = "khaki") +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{...}{type to merge} + +\item{new.type}{label for the new type to create} + +\item{new.color}{color for the new type to create} +} +\value{ +A TRONCO compliant dataset. +} +\description{ +For an input dataset merge all the events of two or more distincit types +(e.g., say that missense and indel mutations are events +of a unique "mutation" type) +} +\examples{ +data(test_dataset_no_hypos) +merge.types(test_dataset_no_hypos, 'ins_del', 'missense_point_mutations') +merge.types(test_dataset_no_hypos, 'ins_del', 'missense_point_mutations', new.type='mut', new.color='green') +} + diff --git a/man/muts.Rd b/man/muts.Rd new file mode 100644 index 00000000..f48d968d --- /dev/null +++ b/man/muts.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{muts} +\alias{muts} +\title{Simple mutation dataset} +\format{TRONCO compliant dataset} +\source{ +fake data +} +\usage{ +data(muts) +} +\description{ +A simple mutation dataset without hypotheses +} +\author{ +Luca De Sano +} + diff --git a/man/nevents.Rd b/man/nevents.Rd new file mode 100644 index 00000000..b1c29a19 --- /dev/null +++ b/man/nevents.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{nevents} +\alias{nevents} +\title{nevents} +\usage{ +nevents(x, genes = NA, types = NA) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{genes}{The genes to consider, if NA all available genes are used.} + +\item{types}{The types of events to consider, if NA all available types are used.} +} +\value{ +The number of events in the dataset involving a certain gene or type of event. +} +\description{ +Return the number of events in the dataset involving a certain gene or type of event. +} +\examples{ +data(test_dataset) +nevents(test_dataset) +} + diff --git a/man/ngenes.Rd b/man/ngenes.Rd new file mode 100644 index 00000000..8f0eef4b --- /dev/null +++ b/man/ngenes.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{ngenes} +\alias{ngenes} +\title{ngenes} +\usage{ +ngenes(x, types = NA) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{types}{The types of events to consider, if NA all available types are used.} +} +\value{ +The number of genes in the dataset involving a certain type of event. +} +\description{ +Return the number of genes in the dataset involving a certain type of event. +} +\examples{ +data(test_dataset) +ngenes(test_dataset) +} + diff --git a/man/nhypotheses.Rd b/man/nhypotheses.Rd new file mode 100644 index 00000000..759c436f --- /dev/null +++ b/man/nhypotheses.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{nhypotheses} +\alias{nhypotheses} +\title{Return the number of hypotheses in the dataset} +\usage{ +nhypotheses(x) +} +\arguments{ +\item{x}{the dataset.} +} +\description{ +Return the number of hypotheses in the dataset +} +\examples{ +data(test_dataset) +nhypotheses(test_dataset) +} + diff --git a/man/npatterns.Rd b/man/npatterns.Rd new file mode 100644 index 00000000..8424cf3b --- /dev/null +++ b/man/npatterns.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{npatterns} +\alias{npatterns} +\title{Return the number of patterns in the dataset} +\usage{ +npatterns(x) +} +\arguments{ +\item{x}{the dataset.} +} +\description{ +Return the number of patterns in the dataset +} +\examples{ +data(test_dataset) +npatterns(test_dataset) +} + diff --git a/man/nsamples.Rd b/man/nsamples.Rd new file mode 100644 index 00000000..968949dd --- /dev/null +++ b/man/nsamples.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{nsamples} +\alias{nsamples} +\title{nsamples} +\usage{ +nsamples(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +The number of samples in the dataset. +} +\description{ +Return the number of samples in the dataset. +} +\examples{ +data(test_dataset) +nsamples(test_dataset) +} + diff --git a/man/ntypes.Rd b/man/ntypes.Rd new file mode 100644 index 00000000..181ce17c --- /dev/null +++ b/man/ntypes.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{ntypes} +\alias{ntypes} +\title{ntypes} +\usage{ +ntypes(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +The number of types in the dataset. +} +\description{ +Return the number of types in the dataset. +} +\examples{ +data(test_dataset) +ntypes(test_dataset) +} + diff --git a/man/oncoprint.Rd b/man/oncoprint.Rd new file mode 100644 index 00000000..f9762778 --- /dev/null +++ b/man/oncoprint.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/visualization.R +\name{oncoprint} +\alias{oncoprint} +\title{oncoprint} +\usage{ +oncoprint(x, excl.sort = TRUE, samples.cluster = FALSE, + genes.cluster = FALSE, file = NA, ann.stage = has.stages(x), + ann.hits = TRUE, stage.color = "YlOrRd", hits.color = "Purples", + null.color = "lightgray", border.color = "white", text.cex = 1, + font.column = NA, font.row = NA, title = as.description(x), + sample.id = FALSE, hide.zeroes = FALSE, legend = TRUE, + legend.cex = 0.5, cellwidth = NA, cellheight = NA, + group.by.label = FALSE, group.by.stage = FALSE, group.samples = NA, + gene.annot = NA, gene.annot.color = "Set1", show.patterns = FALSE, + annotate.consolidate.events = FALSE, txt.stats = paste(nsamples(x), + " samples\\n", nevents(x), " events\\n", ngenes(x), " genes\\n", npatterns(x), + " patterns", sep = ""), ...) +} +\arguments{ +\item{x}{A TRONCO compliant dataset} + +\item{excl.sort}{Boolean value, if TRUE sorts samples to enhance exclusivity of alterations} + +\item{samples.cluster}{Boolean value, if TRUE clusters samples (columns). Default FALSE} + +\item{genes.cluster}{Boolean value, if TRUE clusters genes (rows). Default FALSE} + +\item{file}{If not NA write to \code{file} the Oncoprint, default is NA (just visualization).} + +\item{ann.stage}{Boolean value to annotate stage classification, default depends on \code{x}} + +\item{ann.hits}{Boolean value to annotate the number of events in each sample, default is TRUE} + +\item{stage.color}{RColorbrewer palette to color stage annotations. Default is 'YlOrRd'} + +\item{hits.color}{RColorbrewer palette to color hits annotations. Default is 'Purples'} + +\item{null.color}{Color for the Oncoprint cells with 0s, default is 'lightgray'} + +\item{border.color}{Border color for the Oncoprint, default is white' (no border)} + +\item{text.cex}{Title and annotations cex, multiplied by font size 7} + +\item{font.column}{If NA, half of font.row is used} + +\item{font.row}{If NA, max(c(15 * exp(-0.02 * nrow(data)), 2)) is used, where data is the data +visualized in the Oncoprint} + +\item{title}{Oncoprint title, default is as.name(x) - see \code{as.name}} + +\item{sample.id}{If TRUE shows samples name (columns). Default is FALSE} + +\item{hide.zeroes}{If TRUE trims data - see \code{trim} - before plot. Default is FALSE} + +\item{legend}{If TRUE shows a legend for the types of events visualized. Defualt is TRUE} + +\item{legend.cex}{Default 0.5; determines legend size if \code{legend = TRUE}} + +\item{cellwidth}{Default NA, sets autoscale cell width} + +\item{cellheight}{Default NA, sets autoscale cell height} + +\item{group.by.label}{Sort samples (rows) by event label - usefull when multiple events per gene are +available} + +\item{group.by.stage}{Default FALSE; sort samples by stage.} + +\item{group.samples}{If this samples -> group map is provided, samples are grouped as of groups +and sorted according to the number of mutations per sample - usefull when \code{data} was clustered} + +\item{gene.annot}{Genes'groups, e.g. list(RAF=c('KRAS','NRAS'), Wnt=c('APC', 'CTNNB1')). Default is NA.} + +\item{gene.annot.color}{Either a RColorColorbrewer palette name or a set of custom colors matching names(gene.annot)} + +\item{show.patterns}{If TRUE shows also a separate oncoprint for each pattern. Default is FALSE} + +\item{annotate.consolidate.events}{Default is FALSE. If TRUE an annotation for events to consolidate is shown.} + +\item{txt.stats}{By default, shows a summary statistics for shown data (n,m, |G| and |P|)} + +\item{...}{other arguments to pass to pheatmap} +} +\description{ +oncoprint +} + diff --git a/man/oncoprint.cbio.Rd b/man/oncoprint.cbio.Rd new file mode 100644 index 00000000..482676f8 --- /dev/null +++ b/man/oncoprint.cbio.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/visualization.R +\name{oncoprint.cbio} +\alias{oncoprint.cbio} +\title{oncoprint.cbio} +\usage{ +oncoprint.cbio(x, file = "oncoprint-cbio.txt", hom.del = "Homozygous Loss", + het.loss = "Heterozygous Loss", gain = "Low-level Gain", + amp = "High-level Gain") +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{file}{name of the file where to save the output} + +\item{hom.del}{type of Homozygous Deletion} + +\item{het.loss}{type of Heterozygous Loss} + +\item{gain}{type of Gain} + +\item{amp}{type of Amplification} +} +\value{ +A file containing instruction for the CBio visualization Tool +} +\description{ +export input for cbio visualization at http://www.cbioportal.org/public-portal/oncoprinter.jsp +} +\examples{ +data(gistic) +gistic = import.GISTIC(gistic) +oncoprint.cbio(gistic) +} + diff --git a/man/ov.cgh.Rd b/man/ov.cgh.Rd deleted file mode 100644 index 22b89897..00000000 --- a/man/ov.cgh.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\docType{data} -\name{ov.cgh} -\alias{ov.cgh} -\title{Ovarian cancer CGH data} -\format{A data frame with 87 observations on 7 variables.} -\source{ -\url{http://www.ncbi.nlm.nih.gov/sky/} -} -\description{ -This is a data set obtained using the comparative genomic -hybridization technique (CGH) on sam- ples from papillary serous -cystadenocarcinoma of the ovary. Only the seven most commonly -occurring events are given. -} -\details{ -The CGH technique uses fluorescent staining to detect abnormal -(increased or decreased) number of DNA copies. Often the results -are reported as a gain or loss on a certain arm, without further - distinction for specific regions. It is common to denote a change - in DNA copy number on a specific chromosome arm by prefixing a "-" - sign for decrease and a "+" for increase. Thus, say, -3q denotes - abnormally low DNA copy number on the q arm of the 3rd chromosome. -} - diff --git a/man/pathway.visualization.Rd b/man/pathway.visualization.Rd new file mode 100644 index 00000000..d7300cfb --- /dev/null +++ b/man/pathway.visualization.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/visualization.R +\name{pathway.visualization} +\alias{pathway.visualization} +\title{pathway.visualization} +\usage{ +pathway.visualization(x, title = paste("Pathways:", paste(names(pathways), + collapse = ", ", sep = "")), file, pathways.color = "Set2", + aggregate.pathways, pathways, ...) +} +\arguments{ +\item{x}{A TRONCO complian dataset} + +\item{title}{Plot title} + +\item{file}{To generate a PDF a filename have to be given} + +\item{pathways.color}{A RColorBrewer color palette} + +\item{aggregate.pathways}{todo} + +\item{pathways}{todo} + +\item{...}{todo} +} +\value{ +plot information +} +\description{ +Visualise pathways informations +} + diff --git a/man/perform.likelihood.fit.Rd b/man/perform.likelihood.fit.Rd new file mode 100644 index 00000000..7eeddb23 --- /dev/null +++ b/man/perform.likelihood.fit.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{perform.likelihood.fit} +\alias{perform.likelihood.fit} +\title{perform.likelihood.fit} +\usage{ +perform.likelihood.fit(dataset, adj.matrix, command, regularization) +} +\arguments{ +\item{dataset}{a valid dataset} + +\item{adj.matrix}{the adjacency matrix of the prima facie causes} + +\item{command}{type of search, either hill climbing (hc) or tabu (tabu)} + +\item{regularization}{regularization term to be used in the likelihood fit} +} +\value{ +topology: the adjacency matrix of both the prima facie and causal topologies +} +\description{ +reconstruct the best causal topology by likelihood fit +} + diff --git a/man/pheatmap.Rd b/man/pheatmap.Rd new file mode 100644 index 00000000..3bd27c55 --- /dev/null +++ b/man/pheatmap.Rd @@ -0,0 +1,253 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/visualization.R +\name{pheatmap} +\alias{pheatmap} +\title{A function to draw clustered heatmaps.} +\usage{ +pheatmap(mat, color = colorRampPalette(rev(brewer.pal(n = 7, name = + "RdYlBu")))(100), kmeans_k = NA, breaks = NA, border_color = "grey60", + cellwidth = NA, cellheight = NA, scale = "none", cluster_rows = TRUE, + cluster_cols = TRUE, clustering_distance_rows = "euclidean", + clustering_distance_cols = "euclidean", clustering_method = "complete", + cutree_rows = NA, cutree_cols = NA, + treeheight_row = ifelse(cluster_rows, 50, 0), + treeheight_col = ifelse(cluster_cols, 50, 0), legend = TRUE, + legend_breaks = NA, legend_labels = NA, annotation_row = NA, + annotation_col = NA, annotation = NA, annotation_colors = NA, + annotation_legend = TRUE, drop_levels = TRUE, show_rownames = T, + show_colnames = T, main = NA, fontsize = 10, fontsize_row = fontsize, + fontsize_col = fontsize, display_numbers = F, number_format = "\%.2f", + number_color = "grey30", fontsize_number = 0.8 * fontsize, + gaps_row = NULL, gaps_col = NULL, labels_row = NULL, + labels_col = NULL, filename = NA, width = NA, height = NA, + silent = FALSE, legend.cex = 1, txt.stats = NA, ...) +} +\arguments{ +\item{mat}{numeric matrix of the values to be plotted.} + +\item{color}{vector of colors used in heatmap.} + +\item{kmeans_k}{the number of kmeans clusters to make, if we want to agggregate the +rows before drawing heatmap. If NA then the rows are not aggregated.} + +\item{breaks}{a sequence of numbers that covers the range of values in mat and is one +element longer than color vector. Used for mapping values to colors. Useful, if needed +to map certain values to certain colors, to certain values. If value is NA then the +breaks are calculated automatically.} + +\item{border_color}{color of cell borders on heatmap, use NA if no border should be +drawn.} + +\item{cellwidth}{individual cell width in points. If left as NA, then the values +depend on the size of plotting window.} + +\item{cellheight}{individual cell height in points. If left as NA, +then the values depend on the size of plotting window.} + +\item{scale}{character indicating if the values should be centered and scaled in +either the row direction or the column direction, or none. Corresponding values are +\code{"row"}, \code{"column"} and \code{"none"}} + +\item{cluster_rows}{boolean values determining if rows should be clustered,} + +\item{cluster_cols}{boolean values determining if columns should be clustered.} + +\item{clustering_distance_rows}{distance measure used in clustering rows. Possible +values are \code{"correlation"} for Pearson correlation and all the distances +supported by \code{\link{dist}}, such as \code{"euclidean"}, etc. If the value is none +of the above it is assumed that a distance matrix is provided.} + +\item{clustering_distance_cols}{distance measure used in clustering columns. Possible +values the same as for clustering_distance_rows.} + +\item{clustering_method}{clustering method used. Accepts the same values as +\code{\link{hclust}}.} + +\item{cutree_rows}{number of clusters the rows are divided into, based on the + hierarchical clustering (using cutree), if rows are not clustered, the +argument is ignored} + +\item{cutree_cols}{similar to \code{cutree_rows}, but for columns} + +\item{treeheight_row}{the height of a tree for rows, if these are clustered. +Default value 50 points.} + +\item{treeheight_col}{the height of a tree for columns, if these are clustered. +Default value 50 points.} + +\item{legend}{logical to determine if legend should be drawn or not.} + +\item{legend_breaks}{vector of breakpoints for the legend.} + +\item{legend_labels}{vector of labels for the \code{legend_breaks}.} + +\item{annotation_row}{data frame that specifies the annotations shown on left + side of the heatmap. Each row defines the features for a specific row. The +rows in the data and in the annotation are matched using corresponding row + names. Note that color schemes takes into account if variable is continuous + or discrete.} + +\item{annotation_col}{similar to annotation_row, but for columns.} + +\item{annotation}{deprecated parameter that currently sets the annotation_col if it is missing} + +\item{annotation_colors}{list for specifying annotation_row and +annotation_col track colors manually. It is possible to define the colors +for only some of the features. Check examples for details.} + +\item{annotation_legend}{boolean value showing if the legend for annotation +tracks should be drawn.} + +\item{drop_levels}{logical to determine if unused levels are also shown in +the legend} + +\item{show_rownames}{boolean specifying if column names are be shown.} + +\item{show_colnames}{boolean specifying if column names are be shown.} + +\item{main}{the title of the plot} + +\item{fontsize}{base fontsize for the plot} + +\item{fontsize_row}{fontsize for rownames (Default: fontsize)} + +\item{fontsize_col}{fontsize for colnames (Default: fontsize)} + +\item{display_numbers}{logical determining if the numeric values are also printed to +the cells. If this is a matrix (with same dimensions as original matrix), the contents +of the matrix are shown instead of original values.} + +\item{number_format}{format strings (C printf style) of the numbers shown in cells. +For example "\code{\%.2f}" shows 2 decimal places and "\code{\%.1e}" shows exponential +notation (see more in \code{\link{sprintf}}).} + +\item{number_color}{color of the text} + +\item{fontsize_number}{fontsize of the numbers displayed in cells} + +\item{gaps_row}{vector of row indices that show shere to put gaps into +heatmap. Used only if the rows are not clustered. See \code{cutree_row} +to see how to introduce gaps to clustered rows.} + +\item{gaps_col}{similar to gaps_row, but for columns.} + +\item{labels_row}{custom labels for rows that are used instead of rownames.} + +\item{labels_col}{similar to labels_row, but for columns.} + +\item{filename}{file path where to save the picture. Filetype is decided by +the extension in the path. Currently following formats are supported: png, pdf, tiff, + bmp, jpeg. Even if the plot does not fit into the plotting window, the file size is +calculated so that the plot would fit there, unless specified otherwise.} + +\item{width}{manual option for determining the output file width in inches.} + +\item{height}{manual option for determining the output file height in inches.} + +\item{silent}{do not draw the plot (useful when using the gtable output)} + +\item{legend.cex}{Default 0.5; determines legend size if \code{legend = TRUE}} + +\item{txt.stats}{By default, shows a summary statistics for shown data (n,m, |G| and |P|)} + +\item{\dots}{graphical parameters for the text used in plot. Parameters passed to +\code{\link{grid.text}}, see \code{\link{gpar}}.} +} +\value{ +Invisibly a list of components +\itemize{ + \item \code{tree_row} the clustering of rows as \code{\link{hclust}} object + \item \code{tree_col} the clustering of columns as \code{\link{hclust}} object + \item \code{kmeans} the kmeans clustering of rows if parameter \code{kmeans_k} was +specified +} +} +\description{ +A function to draw clustered heatmaps where one has better control over some graphical +parameters such as cell size, etc. +} +\details{ +The function also allows to aggregate the rows using kmeans clustering. This is +advisable if number of rows is so big that R cannot handle their hierarchical +clustering anymore, roughly more than 1000. Instead of showing all the rows +separately one can cluster the rows in advance and show only the cluster centers. +The number of clusters can be tuned with parameter kmeans_k. +} +\examples{ +# Create test matrix +test = matrix(rnorm(200), 20, 10) +test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3 +test[11:20, seq(2, 10, 2)] = test[11:20, seq(2, 10, 2)] + 2 +test[15:20, seq(2, 10, 2)] = test[15:20, seq(2, 10, 2)] + 4 +colnames(test) = paste("Test", 1:10, sep = "") +rownames(test) = paste("Gene", 1:20, sep = "") + +# Draw heatmaps +pheatmap(test) +pheatmap(test, kmeans_k = 2) +pheatmap(test, scale = "row", clustering_distance_rows = "correlation") +pheatmap(test, color = colorRampPalette(c("navy", "white", "firebrick3"))(50)) +pheatmap(test, cluster_row = FALSE) +pheatmap(test, legend = FALSE) + +# Show text within cells +pheatmap(test, display_numbers = TRUE) +pheatmap(test, display_numbers = TRUE, number_format = "\\\%.1e") +pheatmap(test, display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test))) +pheatmap(test, cluster_row = FALSE, legend_breaks = -1:4, legend_labels = c("0", +"1e-4", "1e-3", "1e-2", "1e-1", "1")) + +# Fix cell sizes and save to file with correct size +pheatmap(test, cellwidth = 15, cellheight = 12, main = "Example heatmap") +pheatmap(test, cellwidth = 15, cellheight = 12, fontsize = 8, filename = "test.pdf") + +# Generate annotations for rows and columns +annotation_col = data.frame( + CellType = factor(rep(c("CT1", "CT2"), 5)), + Time = 1:5 + ) +rownames(annotation_col) = paste("Test", 1:10, sep = "") + +annotation_row = data.frame( + GeneClass = factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 6))) + ) +rownames(annotation_row) = paste("Gene", 1:20, sep = "") + +# Display row and color annotations +pheatmap(test, annotation_col = annotation_col) +pheatmap(test, annotation_col = annotation_col, annotation_legend = FALSE) +pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row) + + +# Specify colors +ann_colors = list( + Time = c("white", "firebrick"), + CellType = c(CT1 = "#1B9E77", CT2 = "#D95F02"), + GeneClass = c(Path1 = "#7570B3", Path2 = "#E7298A", Path3 = "#66A61E") +) + +pheatmap(test, annotation_col = annotation_col, annotation_colors = ann_colors, main = "Title") +pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row, + annotation_colors = ann_colors) +pheatmap(test, annotation_col = annotation_col, annotation_colors = ann_colors[2]) + +# Gaps in heatmaps +pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14)) +pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE, gaps_row = c(10, 14), + cutree_col = 2) + +# Show custom strings as row/col names +labels_row = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", +"", "", "Il10", "Il15", "Il1b") + +pheatmap(test, annotation_col = annotation_col, labels_row = labels_row) + +# Specifying clustering from distance matrix +drows = dist(test, method = "minkowski") +dcols = dist(t(test), method = "minkowski") +pheatmap(test, clustering_distance_rows = drows, clustering_distance_cols = dcols) +} +\author{ +Raivo Kolde +} + diff --git a/man/rank.recurrents.Rd b/man/rank.recurrents.Rd new file mode 100644 index 00000000..b19cdb69 --- /dev/null +++ b/man/rank.recurrents.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/selection.R +\name{rank.recurrents} +\alias{rank.recurrents} +\title{rank.recurrents} +\usage{ +rank.recurrents(x, n) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{n}{The number of events to rank} +} +\value{ +the first n recurrent events +} +\description{ +Return the first n recurrent events +} +\examples{ +data(test_dataset) +dataset = rank.recurrents(test_dataset, 10) +} + diff --git a/man/remove.cycles.Rd b/man/remove.cycles.Rd new file mode 100644 index 00000000..d30262c1 --- /dev/null +++ b/man/remove.cycles.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{remove.cycles} +\alias{remove.cycles} +\title{remove.cycles} +\usage{ +remove.cycles(adj.matrix, weights.temporal.priority, weights.matrix, + not.ordered, hypotheses = NA, silent) +} +\arguments{ +\item{adj.matrix}{adjacency matrix of the topology} + +\item{weights.temporal.priority}{weighted matrix to be used to remove the cycles involving atomic events} + +\item{weights.matrix}{weighted matrix to be used to remove the cycles involving hypotheses} + +\item{not.ordered}{list of the nodes to be orderd} + +\item{hypotheses}{hypotheses to evaluate potential cycles} + +\item{silent}{Should I be verbose?} +} +\value{ +acyclic.topology: structure representing the best acyclic topology +} +\description{ +remove any cycle from a given cyclic topology +} + diff --git a/man/rename.gene.Rd b/man/rename.gene.Rd new file mode 100644 index 00000000..1f871c81 --- /dev/null +++ b/man/rename.gene.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{rename.gene} +\alias{rename.gene} +\title{rename.gene} +\usage{ +rename.gene(x, old.name, new.name) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{old.name}{The name of the gene to rename.} + +\item{new.name}{The new name} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Rename a gene +} +\examples{ +data(test_dataset) +test_dataset = rename.gene(test_dataset, 'TET2', 'gene x') +} + diff --git a/man/rename.type.Rd b/man/rename.type.Rd new file mode 100644 index 00000000..793f551e --- /dev/null +++ b/man/rename.type.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{rename.type} +\alias{rename.type} +\title{rename.type} +\usage{ +rename.type(x, old.name, new.name) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{old.name}{The type of event to rename.} + +\item{new.name}{The new name} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Rename an event type +} +\examples{ +data(test_dataset) +test_dataset = rename.type(test_dataset, 'ins_del', 'deletion') +} + diff --git a/man/reset.Rd b/man/reset.Rd deleted file mode 100644 index 4fe28fdb..00000000 --- a/man/reset.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\name{reset} -\alias{reset} -\alias{reset.events} -\alias{reset.types} -\title{reset} -\usage{ -reset.events() - -reset.types() - -reset() -} -\description{ -A set of functions to reset events, types and data.values variables -} -\details{ -\code{reset.events} Resets the events variable - -\code{reset.types()} Resets the types variable - -\code{reset()} Resets types, events and data.values variables -} -\examples{ -reset.events() -reset.types() -reset() -} - diff --git a/man/samples.selection.Rd b/man/samples.selection.Rd new file mode 100644 index 00000000..9732c450 --- /dev/null +++ b/man/samples.selection.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/selection.R +\name{samples.selection} +\alias{samples.selection} +\title{samples.selection} +\usage{ +samples.selection(x, samples) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{samples}{A list of samples} +} +\value{ +A TRONCO compliant dataset. +} +\description{ +Filter a dataset based on selected samples id +} +\examples{ +data(test_dataset) +dataset = samples.selection(test_dataset, c('patient 1', 'patient 2')) +} + diff --git a/man/sbind.Rd b/man/sbind.Rd new file mode 100644 index 00000000..d87789c2 --- /dev/null +++ b/man/sbind.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{sbind} +\alias{sbind} +\title{sbind} +\usage{ +sbind(...) +} +\arguments{ +\item{...}{the input datasets} +} +\value{ +A TRONCO complian dataset. +} +\description{ +Binds samples from one or more datasets, which must be defined over the same set of events +} + diff --git a/man/show.Rd b/man/show.Rd new file mode 100644 index 00000000..40bd4c3e --- /dev/null +++ b/man/show.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{show} +\alias{show} +\title{show} +\usage{ +show(x, view = 10) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{view}{The firse \code{view} events are shown via \code{head}.} +} +\description{ +Print to console a short report of a dataset 'x', which should be +a TRONCO compliant dataset - see \code{is.compliant}. +} +\examples{ +data(test_dataset) +show(test_dataset) +} + diff --git a/man/sort.by.frequency.Rd b/man/sort.by.frequency.Rd new file mode 100644 index 00000000..69a32736 --- /dev/null +++ b/man/sort.by.frequency.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/as.functions.R +\name{sort.by.frequency} +\alias{sort.by.frequency} +\title{sort.by.frequency} +\usage{ +\method{sort}{by.frequency}(x, decreasing = TRUE, ...) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{decreasing}{Inverse order. Default TRUE} + +\item{...}{just for compatibility} +} +\value{ +A TRONCO compliant dataset with the internal genotypes sorted according to event frequency. +} +\description{ +Sort the internal genotypes according to event frequency. +} +\examples{ +data(test_dataset) +sort.by.frequency(test_dataset) +} + diff --git a/man/ssplit.Rd b/man/ssplit.Rd new file mode 100644 index 00000000..29ee70e7 --- /dev/null +++ b/man/ssplit.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{ssplit} +\alias{ssplit} +\title{ssplit} +\usage{ +ssplit(x, clusters, idx = NA) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{clusters}{A list of clusters. Rownames must match samples list of x} + +\item{idx}{ID of a specific group present in stages. If NA all groups will be extracted} +} +\value{ +A TRONCO compliant dataset. +} +\description{ +Split cohort (samples) into groups, return either all groups or a specific group. +} + diff --git a/man/stage.Rd b/man/stage.Rd new file mode 100644 index 00000000..392ba0ca --- /dev/null +++ b/man/stage.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{stage} +\alias{stage} +\title{Stage information for test_dataset} +\format{Vector of stages} +\source{ +fake data +} +\usage{ +data(stage) +} +\description{ +This dataset contains stage information for patient in test_dataset +} +\author{ +Luca De Sano +} + diff --git a/man/test_dataset.Rd b/man/test_dataset.Rd new file mode 100644 index 00000000..93c58557 --- /dev/null +++ b/man/test_dataset.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{test_dataset} +\alias{test_dataset} +\title{A complete dataset with hypotheses} +\format{TRONCO compliant dataset} +\source{ +fake data +} +\usage{ +data(test_dataset) +} +\description{ +This dataset contains a complete test dataset +} +\author{ +Luca De Sano +} + diff --git a/man/test_dataset_no_hypos.Rd b/man/test_dataset_no_hypos.Rd new file mode 100644 index 00000000..4c989bda --- /dev/null +++ b/man/test_dataset_no_hypos.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{test_dataset_no_hypos} +\alias{test_dataset_no_hypos} +\title{A complete dataset} +\format{TRONCO compliant dataset} +\source{ +fake data +} +\usage{ +data(test_dataset_no_hypos) +} +\description{ +This dataset contains a complete test dataset +} +\author{ +Luca De Sano +} + diff --git a/man/test_model.Rd b/man/test_model.Rd new file mode 100644 index 00000000..5f71252e --- /dev/null +++ b/man/test_model.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{test_model} +\alias{test_model} +\title{A complete dataset with a reconstructed model} +\format{TRONCO compliant dataset} +\source{ +fake data +} +\usage{ +data(test_model) +} +\description{ +This dataset contains a model reconstructed with CAPRI +} +\author{ +Luca De Sano +} + diff --git a/man/trim.Rd b/man/trim.Rd new file mode 100644 index 00000000..62d1559f --- /dev/null +++ b/man/trim.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/editing.functions.R +\name{trim} +\alias{trim} +\title{trim} +\usage{ +trim(x) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} +} +\value{ +A TRONCO compliant dataset. +} +\description{ +Deletes all events which have frequency 0 in the dataset. +} +\examples{ +data(test_dataset) +test_dataset = trim(test_dataset) +} + diff --git a/man/tronco.bootstrap.Rd b/man/tronco.bootstrap.Rd index ac8a04ed..6a62e413 100644 --- a/man/tronco.bootstrap.Rd +++ b/man/tronco.bootstrap.Rd @@ -1,26 +1,31 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tronco.R \name{tronco.bootstrap} \alias{tronco.bootstrap} -\title{perform bootstrap algorithm} +\title{tronco bootstrap} \usage{ -tronco.bootstrap(topology, lambda = 0.5, type = "non-parametric", - nboot = 1000) +tronco.bootstrap(reconstruction, type = "non-parametric", nboot = 100, + verbose = FALSE) } \arguments{ -\item{topology}{A topology returned by a reconstruction algorithm} +\item{reconstruction}{The output of tronco.capri or tronco.caprese} -\item{lambda}{A lambda value, default is 0.5} +\item{type}{Parameter to define the type of sampling to be performed, e.g., non-parametric for uniform sampling.} -\item{type}{The type of bootstrap performed, parametric and non parametric types are available. -To specify wich type of bootstrap run type must be "parametric" or "non-parametric".} +\item{nboot}{Number of bootstrap sampling to be performed when estimating the model confidence.} -\item{nboot}{Samplig value. The grater will be the nboot value the logehr time the -entire process will take to complete the computing} +\item{verbose}{Should I be verbose?} } \value{ -A topology object with bootstrap informations added +A TRONCO compliant object with reconstructed model } \description{ -\code{tronco.bootstrap} perform parametric and non-parametric bootstrap algorithms +Bootstrap a reconstructed progression model +} +\examples{ +data(test_dataset) +recon = tronco.capri(test_dataset) +boot = tronco.bootstrap(recon, nboot=5) +tronco.plot(boot) } diff --git a/man/tronco.bootstrap.show.Rd b/man/tronco.bootstrap.show.Rd deleted file mode 100644 index 05a8e07f..00000000 --- a/man/tronco.bootstrap.show.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\name{tronco.bootstrap.show} -\alias{tronco.bootstrap.show} -\title{show bootstrapping results} -\usage{ -tronco.bootstrap.show(topology) -} -\arguments{ -\item{topology}{A topology returned by a reconstruction algorithm} -} -\description{ -\code{tronco.bootstrap.show} show bootstrapping results. Requires that you already executed tronco.bootstrap -} - diff --git a/man/tronco.caprese.Rd b/man/tronco.caprese.Rd index ee901d6c..dcb1ff60 100644 --- a/man/tronco.caprese.Rd +++ b/man/tronco.caprese.Rd @@ -1,27 +1,29 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tronco.R \name{tronco.caprese} \alias{tronco.caprese} -\title{runs CAPRESE algorithm} +\title{tronco caprese} \usage{ -tronco.caprese(dataset, lambda = 0.5, verbose = FALSE) +tronco.caprese(data, lambda = 0.5, do.estimation = FALSE, silent = FALSE) } \arguments{ -\item{dataset}{The input dataset. Type: dataframe. The dataset given as input is the data.values data frame loaded by the \code{data} function.} +\item{data}{A TRONCO compliant dataset.} -\item{lambda}{the real positive value of the shrinkage coefficient, required to range in [0, 1]. Its default value is 0.5, if unspecified.} +\item{lambda}{Coefficient to combine the raw estimate with a correction factor into a shrinkage estimator.} -\item{verbose}{execute CAPRESE algorithm with verbose output to screen. Type: boolean, dafault: FALSE.} +\item{do.estimation}{A parameter to disable/enable the estimation of the error rates give the reconstructed model.} + +\item{silent}{A parameter to disable/enable verbose messages.} } \value{ -an object containing the reconstructed topology and confidence values. +A TRONCO compliant object with reconstructed model } \description{ -\code{tronco.caprese} executes the CAPRESE algorithm on the dataset \code{data.values} specified. -} -\details{ -\code{tronco.caprese} executes the reconstruction of the topology, and computesg all the confidence measures defined in \code{confidence}. +Reconstruct a progression model using CAPRESE algorithm } -\seealso{ -\code{\link{data}} +\examples{ +data(test_dataset) +recon = tronco.caprese(test_dataset) +tronco.plot(recon) } diff --git a/man/tronco.capri.Rd b/man/tronco.capri.Rd new file mode 100644 index 00000000..0c07c43a --- /dev/null +++ b/man/tronco.capri.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tronco.R +\name{tronco.capri} +\alias{tronco.capri} +\title{tronco capri} +\usage{ +tronco.capri(data, command = "hc", regularization = c("bic", "aic"), + do.boot = TRUE, nboot = 100, pvalue = 0.05, min.boot = 3, + min.stat = TRUE, boot.seed = NULL, do.estimation = FALSE, + silent = FALSE) +} +\arguments{ +\item{data}{A TRONCO compliant dataset.} + +\item{command}{Parameter to define to heuristic search to be performed. Hill Climbing and Tabu search are currently available.} + +\item{regularization}{Select the regularization for the likelihood estimation, e.g., BIC, AIC.} + +\item{do.boot}{A parameter to disable/enable the estimation of the error rates give the reconstructed model.} + +\item{nboot}{Number of bootstrap sampling (with rejection) to be performed when estimating the selective advantage scores.} + +\item{pvalue}{Pvalue to accept/reject the valid selective advantage relations.} + +\item{min.boot}{Minimum number of bootstrap sampling to be performed.} + +\item{min.stat}{A parameter to disable/enable the minimum number of bootstrap sampling required besides nboot if any sampling is rejected.} + +\item{boot.seed}{Initial seed for the bootstrap random sampling.} + +\item{do.estimation}{A parameter to disable/enable the estimation of the error rates give the reconstructed model.} + +\item{silent}{A parameter to disable/enable verbose messages.} +} +\value{ +A TRONCO compliant object with reconstructed model +} +\description{ +Reconstruct a progression model using CAPRI algorithm +} +\examples{ +data(test_dataset) +recon = tronco.capri(test_dataset) +tronco.plot(recon) +} + diff --git a/man/tronco.plot.Rd b/man/tronco.plot.Rd index 99cd6e50..ea6b661b 100644 --- a/man/tronco.plot.Rd +++ b/man/tronco.plot.Rd @@ -1,53 +1,81 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/tronco.R \name{tronco.plot} \alias{tronco.plot} -\title{plot a progression model} +\title{tronco.plot} \usage{ -tronco.plot(topology, title = paste("Progression model", topology@algorithm, - sep = " "), title.color = "black", confidence = FALSE, legend = TRUE, - legend.title = "Legend", legend.columns = 1, legend.inline = FALSE, - legend.pos = "bottomright", legend.coeff = 1, label.coeff = 1, - label.color = "black", label.edge.size = 12) +tronco.plot(x, regularization = names(x$model), fontsize = NA, height = 2, + width = 3, height.logic = 1, pf = FALSE, disconnected = FALSE, + scale.nodes = NA, title = as.description(x), confidence = NA, + p.min = x$parameters$pvalue, legend = TRUE, legend.cex = 1, + edge.cex = 1, label.edge.size = NA, expand = TRUE, genes = NULL, + relations.filter = NA, edge.color = "black", pathways.color = "Set1", + file = NA, legend.pos = "bottom", pathways = NULL, lwd = 3, + annotate.sample = NA, ...) } \arguments{ -\item{topology}{A topology returned by a reconstruction algorithm} +\item{x}{A reconstructed model (the output of tronco.capri or tronco.caprese)} -\item{title}{plot Plot title (default "Progression model x", x reconstruction algorithm)} +\item{regularization}{A vector containing the names of regularizators used (BIC or AIC)} -\item{title.color}{color title (default "black")} +\item{fontsize}{For node names. Default NA for automatic rescaling} -\item{confidence}{bool; plot edges according to confidence (default is f)} +\item{height}{Proportion node height - node width. Default height 2} -\item{legend}{bool; show/hide the legend (default is t)} +\item{width}{Proportion node height - node width. Default width 2} -\item{legend.title}{string; legend title (default is "Legend")} +\item{height.logic}{Height of logical nodes. Defaul 1} -\item{legend.columns}{int; use 1 or 2 columns to plot the legend (default is 1)} +\item{pf}{Should I print Prima Facie? Default False} -\item{legend.inline}{bool; print inline legend (default is f)} +\item{disconnected}{Should I print disconnected nodes? Default False} -\item{legend.pos}{string; legend positioning, available keywords "topleft", "topright", -"bottom- left" and "bottomright" (default is "bottomright")} +\item{scale.nodes}{Node scaling coefficient (based on node frequency). Default NA (autoscale)} -\item{legend.coeff}{double; size of the types label in the legend (default is 1)} +\item{title}{Title of the plot. Default as.description(x)} -\item{label.coeff}{double; size of the events label (default is 1)} +\item{confidence}{Should I add confidence informations? No if NA} -\item{label.color}{color events label (default "black")} +\item{p.min}{p-value cutoff. Default automatic} -\item{label.edge.size}{double; size of the confidence label, when used (default is 12)} +\item{legend}{Should I visualise the legend?} + +\item{legend.cex}{CEX value for legend. Default 1.0} + +\item{edge.cex}{CEX value for edge labels. Default 1.0} + +\item{label.edge.size}{Size of edge labels. Default NA for automatic rescaling} + +\item{expand}{Should I expand hypotheses? Default TRUE} + +\item{genes}{Visualise only genes in this list. Default NULL, visualise all.} + +\item{relations.filter}{Filter relations to dispaly according to this functions. Default NA} + +\item{edge.color}{Edge color. Default 'black'} + +\item{pathways.color}{RColorBrewer colorser for patways. Default 'Set1'.} + +\item{file}{String containing filename for PDF output. If NA no PDF output will be provided} + +\item{legend.pos}{Legend position. Default 'bottom',} + +\item{pathways}{A vector containing pathways information as described in as.patterns()} + +\item{lwd}{Edge base lwd. Default 3} + +\item{annotate.sample}{= List of samples to search for events in model} + +\item{...}{Additional arguments for RGraphviz plot function} +} +\value{ +Information about the reconstructed model } \description{ -\code{tronco.plot} plots a progression model from a recostructed \code{topology}. +Plots a progression model from a recostructed dataset } \examples{ -\dontrun{ -types.load("data/types.txt") -events.load("data/events.txt") -data.load("data/CGH.txt") -topology <- tronco.caprese(data.values) -tronco.plot(topology, legend.pos = "topleft", legend = TRUE, confidence = TRUE, -legend.col = 1, legend.coeff = 0.7, label.edge.size = 10, label.coeff = 0.7) -} +data(test_model) +tronco.plot(test_model) } diff --git a/man/types.Rd b/man/types.Rd deleted file mode 100644 index 4a87ea43..00000000 --- a/man/types.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\docType{data} -\name{types} -\alias{types} -\title{Types collection for Ovarian cancer CGH data} -\format{An example with 2 types} -\description{ -This example contains a collection of types associeted to the -Ovarian cancer CGH dataset -} - diff --git a/man/types.add.Rd b/man/types.add.Rd deleted file mode 100644 index a61da01b..00000000 --- a/man/types.add.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\name{types.add} -\alias{types.add} -\title{add a new type of event (e.g., missense point mutation)} -\usage{ -types.add(type.name, color.name) -} -\arguments{ -\item{type.name}{The type label. All type labels are strings.} - -\item{color.name}{The type color. All R's color definitions are allowed.} -} -\description{ -\code{types.add} sets a global data frame 'types' that contains all types defined. Types can be added and refined incrementally, in any order. -} -\details{ -\code{types.add} defines a type of event considered at a time. If the type was previously defined, its definition is updated to keep track of its last definition. A consistency check is performed to ensure that the type is valid. Types must be defined before events are loaded. -} -\examples{ -types.add("gain", "red") -} - diff --git a/man/types.load.Rd b/man/types.load.Rd deleted file mode 100644 index 3d0127ca..00000000 --- a/man/types.load.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2 (4.0.2): do not edit by hand -\name{types.load} -\alias{types.load} -\title{load a set of types from file} -\usage{ -types.load(data.input) -} -\arguments{ -\item{data.input}{The input file path or a dataset to be validated.} -} -\description{ -\code{types.load} sets a global data frame 'types' that contains all type definitions found in a specified file or dataset -to be validated. -} -\details{ -\code{types.load} allows to load type definitions from a given file path. The file which contains -all the definitions must be structured as a csv file. All definitions are couple of values -type name and color name as shown below: - -typeName, colorName -... , ... -} -\seealso{ -\code{\link{types.add}} -} - diff --git a/man/verify.parents.Rd b/man/verify.parents.Rd new file mode 100644 index 00000000..556c1672 --- /dev/null +++ b/man/verify.parents.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/caprese.algorithm.R +\name{verify.parents} +\alias{verify.parents} +\title{verify.parents} +\usage{ +verify.parents(best.parents, marginal.probs, joint.probs) +} +\arguments{ +\item{best.parents}{best edges to be verified} + +\item{marginal.probs}{observed marginal probabilities} + +\item{joint.probs}{observed joint probabilities} +} +\value{ +best.parents: list of the best valid parents +} +\description{ +verify the independent progression filter +} + diff --git a/man/verify.probability.raising.do.boot.Rd b/man/verify.probability.raising.do.boot.Rd new file mode 100644 index 00000000..4a7ce2d1 --- /dev/null +++ b/man/verify.probability.raising.do.boot.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{verify.probability.raising.do.boot} +\alias{verify.probability.raising.do.boot} +\title{verify.probability.raising.do.boot} +\usage{ +verify.probability.raising.do.boot(prima.facie.model.distributions, + prima.facie.null.distributions, pvalue, adj.matrix, edge.confidence.matrix) +} +\arguments{ +\item{prima.facie.model.distributions}{distributions of the prima facie model} + +\item{prima.facie.null.distributions}{distributions of the prima facie null} + +\item{pvalue}{minimum pvalue for the Mann-Whitney U tests to be significant} + +\item{adj.matrix}{adjacency matrix of the topology} + +\item{edge.confidence.matrix}{matrix of the confidence of each edge} +} +\value{ +probability.raising: list describing the causes where probability raising is verified +} +\description{ +verify the probability raising condition +} + diff --git a/man/verify.probability.raising.no.boot.Rd b/man/verify.probability.raising.no.boot.Rd new file mode 100644 index 00000000..69c9bbab --- /dev/null +++ b/man/verify.probability.raising.no.boot.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{verify.probability.raising.no.boot} +\alias{verify.probability.raising.no.boot} +\title{verify.probability.raising.no.boot} +\usage{ +verify.probability.raising.no.boot(prima.facie.model, prima.facie.null, + adj.matrix, edge.confidence.matrix) +} +\arguments{ +\item{prima.facie.model}{prima facie model} + +\item{prima.facie.null}{prima facie null} + +\item{adj.matrix}{adjacency matrix of the topology} + +\item{edge.confidence.matrix}{matrix of the confidence of each edge} +} +\value{ +probability.raising: adjacency matrix where temporal priority is verified +} +\description{ +verify the probability raising condition without bootstrap +} + diff --git a/man/verify.temporal.priority.do.boot.Rd b/man/verify.temporal.priority.do.boot.Rd new file mode 100644 index 00000000..589f614a --- /dev/null +++ b/man/verify.temporal.priority.do.boot.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{verify.temporal.priority.do.boot} +\alias{verify.temporal.priority.do.boot} +\title{verify.temporal.priority.do.boot} +\usage{ +verify.temporal.priority.do.boot(marginal.probs.distributions, pvalue, + adj.matrix, edge.confidence.matrix) +} +\arguments{ +\item{marginal.probs.distributions}{distributions of the bootstrapped marginal probabilities} + +\item{pvalue}{minimum pvalue for the Mann-Whitney U tests to be significant} + +\item{adj.matrix}{adjacency matrix of the topology} + +\item{edge.confidence.matrix}{matrix of the confidence of each edge} +} +\value{ +temporal.priority: list describing the causes where temporal priority is verified +} +\description{ +verify the temporal priority condition with bootstrap +} + diff --git a/man/verify.temporal.priority.no.boot.Rd b/man/verify.temporal.priority.no.boot.Rd new file mode 100644 index 00000000..4e863c4d --- /dev/null +++ b/man/verify.temporal.priority.no.boot.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/capri.algorithm.R +\name{verify.temporal.priority.no.boot} +\alias{verify.temporal.priority.no.boot} +\title{verify.temporal.priority.no.boot} +\usage{ +verify.temporal.priority.no.boot(marginal.probs, adj.matrix, + edge.confidence.matrix) +} +\arguments{ +\item{marginal.probs}{marginal probabilities} + +\item{adj.matrix}{adjacency matrix of the topology} + +\item{edge.confidence.matrix}{matrix of the confidence of each edge} +} +\value{ +temporal.priority: adjacency matrix where temporal priority is verified +} +\description{ +verify the temporal priority condition without bootstrap +} + diff --git a/man/which.samples.Rd b/man/which.samples.Rd new file mode 100644 index 00000000..f17983a0 --- /dev/null +++ b/man/which.samples.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/which.functions.R +\name{which.samples} +\alias{which.samples} +\title{which.samples} +\usage{ +which.samples(x, gene, type, neg = FALSE) +} +\arguments{ +\item{x}{A TRONCO compliant dataset.} + +\item{gene}{A list of gene names} + +\item{type}{A list of types} + +\item{neg}{If FALSE return the list, if TRUE return as.samples() - list} +} +\value{ +A list of sample +} +\description{ +Return a list of samples with specified alteration +} +\examples{ +data(test_dataset) +which.samples(test_dataset, 'TET2', 'ins_del') +which.samples(test_dataset, 'TET2', 'ins_del', neg=TRUE) +} + diff --git a/tests/runTests.R b/tests/runTests.R deleted file mode 100644 index 407ec5b9..00000000 --- a/tests/runTests.R +++ /dev/null @@ -1 +0,0 @@ -BiocGenerics:::testPackage("TRONCO") \ No newline at end of file diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 00000000..759c7cda --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(TRONCO) + +test_check("TRONCO") diff --git a/tests/testthat/test_as_function.R b/tests/testthat/test_as_function.R new file mode 100644 index 00000000..61b7fc41 --- /dev/null +++ b/tests/testthat/test_as_function.R @@ -0,0 +1,61 @@ + +data(maf) +muts = import.MAF(maf) +context("AS functions test") + +test_that("as.genotypes returns a genotypes matrix", { + data(as.genotypes.test) + expect_equal(as.genotypes(muts), as.genotypes.test) + expect_equal(as.genotypes(NULL), NULL) +}) + +test_that("as.samples returns a list of samples", { + expect_equal(as.samples(muts), + unique(as.character(maf$Tumor_Sample_Barcode))) + expect_equal(as.samples(NULL), NULL) +}) + +test_that("as.genes returns a list of samples", { + expect_equal(as.genes(muts), unique(as.character(maf$Hugo_Symbol))) + expect_equal(as.genes(NULL), NULL) +}) + +test_that("as.events returns a list of samples", { + data(as.events.test) + expect_equal(as.events(muts), as.events.test) + expect_equal(as.events(NULL), NULL) +}) + +test_that("as.stages returns a list of stages", { + data(as.stages.test) + stages = list() + stages$stage = c('A', 'B', 'C') + names(stages$stage) = as.samples(muts) + stages = as.data.frame(stages) + muts_stages = annotate.stages(muts, stages=stages) + expect_equal(as.stages(muts_stages), as.stages.test) + expect_equal(as.stages(muts), NA) + expect_equal(as.stages(NULL), NA) +}) + +test_that("as.types returns a list of types", { + expect_equal(as.types(muts), 'Mutation') + expect_equal(as.types(NULL), NULL) +}) + +test_that("as.colors returns a list of types", { + areColors <- function(x) { + sapply(x, function(X) { + tryCatch(is.matrix(col2rgb(X)), + error = function(e) FALSE) + }) + } + expect_true(all(areColors(as.colors(muts)))) + expect_equal(as.colors(NULL), NULL) +}) + + + + + + diff --git a/vignettes/events.txt b/vignettes/events.txt deleted file mode 100644 index cabd982a..00000000 --- a/vignettes/events.txt +++ /dev/null @@ -1,7 +0,0 @@ -8q+ , gain , 1 -3q+ , gain , 2 -5q- , loss , 3 -4q- , loss , 4 -8p- , loss , 5 -1q+ , gain , 6 -Xp- , loss , 7 diff --git a/vignettes/vignette.Rnw b/vignettes/vignette.Rnw index caa85302..2fef2acc 100644 --- a/vignettes/vignette.Rnw +++ b/vignettes/vignette.Rnw @@ -1,4 +1,11 @@ \documentclass[a4paper, 9pt]{article} + +<>= +BiocStyle::latex() +@ + +% \VignetteIndexEntry{An R Package for TRanslational ONCOlogy} + \usepackage{hyperref} \usepackage{amsmath, amsthm, amssymb} \usepackage{xfrac} @@ -16,6 +23,8 @@ \usepackage{url} +\usepackage{placeins} + \usepackage{xspace} @@ -23,11 +32,6 @@ \newcommand{\TRONCO}{\textsc{tronco}} \usepackage{fullpage} -% \VignetteIndexEntry{TRONCO} -%\VignetteIndexEntry{TRONCO} -%\VignetteDepends{TRONCO} -%\VignetteKeywords{TRONCO} -%\VignettePackage{TRONCO} \begin{document} @@ -39,6 +43,7 @@ Marco Antoniotti\footnote{Dipartimento di Informatica Sistemistica e Comunicazione, Universit\'a degli Studi Milano-Bicocca Milano, Italy.} \and Giulio Caravagna$^\ast$ \and +Luca De Sano$^\ast$ \and Alex Graudenzi$^\ast$ \and Ilya Korsunsky\footnote{Courant Institute of Mathematical Sciences, New York University, New York, USA.} \and Mattia Longoni$^\ast$ \and @@ -54,31 +59,28 @@ Daniele Ramazzotti$^\ast$ \begin{center} \begin{minipage}[h]{0.75\textwidth} -\textbf{Abstract.} Genotype-level {\em cancer progression models} describe the ordering of accumulating mutations, e.g., somatic mutations / copy number variations, during cancer development. These graphical models help understand the ``causal structure'' involving events promoting cancer progression, possibly predicting complex patterns characterising genomic progression of a cancer. Reconstructed models can be used to better characterise genotype-phenotype relation, and suggest novel targets for therapy design. - -\TRONCO{} ({\sc tr}{\em anslational} {\sc onco}{\em logy}) is a \textsc{r} package aimed at collecting state-of-the-art algorithms to infer -\emph{progression models} from \emph{cross-sectional} data, i.e., data collected from independent patients which does not necessarily incorporate any evident temporal information. These algorithms require a binary input matrix where: $(i)$ each row represents a patient genome, $(ii)$ each column an event relevant to the progression (a priori selected) and a $0/1$ value models the absence/presence of a certain mutation in a certain patient. - - - -The current first version of \TRONCO{} -implements the \CAPRESE{} algorithm ({\sc ca}{\em ncer} {\sc pr}{\em ogression} {\sc e}{\em xtraction} {\em with} {\sc s}{\em ingle} {\sc e}{\em dges}) to infer possible progression models arranged as \emph{trees}; -cfr. -\begin{itemize} -\item \emph{Inferring tree causal models of cancer progression with - probability raising}, L. Olde Loohuis, G. Caravagna, - A. Graudenzi, D. Ramazzotti, G. Mauri, M. Antoniotti and - B. Mishra. {PLoS One}, \emph{to appear}. -\end{itemize} -This vignette shows how to use \TRONCO{} to infer a tree model of -ovarian cancer progression from CGH data of copy number alterations (classified as gains or losses over chromosome's arms). The dataset used is -available in the SKY/M-FISH database. -The reference manual for \TRONCO{} is available in the package. -\begin{center} -\includegraphics[width=0.9\textwidth]{workflow.png} -\end{center} -\flushright -\scriptsize \em The \TRONCO{} workflow. +\textbf{Abstract.} Genotype-level {\em cancer progression models} describe the temporal ordering in which genomic alterations such as somatic mutations and copy number alterations tend to fixate and accumulate during cancer formation and progression. These graphical models can describe trends of \textit{natural selection} across a population of independent tumour samples (cross-sectional data), or reconstruct the clonal evolution in a single patient's tumour (multi-region or single-cell data). In terms of application, such models can be used to better elucidate genotype-phenotype relation, predict cancer hallmarks and outcome of personalised treatment as well as suggest novel targets for therapy design. +\\ + +\TRONCO{} ({\sc tr}{\em anslational} {\sc onco}{\em logy}) is a \textsc{R} package which collects +algorithms to infer progression models from Bernoulli 0/1 profiles of genomic +alterations across a tumor sample. Such profiles are usually visualised as a +binary input matrix where each row represents a patient's sample (e.g., the +result of a sequenced tumor biopsy), and each column an event relevant to the +progression (a certain type of somatic mutation, a focal or higher-level +chromosomal copy number alteration etc.); a 0/1 value models the absence/presence +of that alteration in the sample. In this version of TRONCO such profiles can +be readily imported by boolean matrices and MAF/GISTIC files. The package provides +various functions to editing, visualise and subset such data, as well as functions +to query the Cbio portal for cancer genomics. This version of TRONCO comes with +the parallel implementations the CAPRESE [PLoS ONE 9(12): e115570] and CAPRI +[Bioinformatics, doi:10.1093/bioinformatics/btv296] algorithms to infer possible +progression models arranged as trees, or general direct acyclic graphs. +Bootstrap functions to assess the parametric, non-prametric and statistical +confidence of every inferred model are also provided. The package comes with +some data available as well, which include the dataset of Atypical Chronic Myeloid +Leukemia samples provided by Piazza et al., Nat. Genet., 45 (2013), and examples. + \end{minipage} \end{center} @@ -91,252 +93,203 @@ The reference manual for \TRONCO{} is available in the package. installed to use the package, see \texttt{Bioconductor.org}. -\paragraph{\large 1. Types/Events definition}{\ }\\ +\paragraph{\large Event selection}{\ }\\ -First, load \TRONCO{} in your \textsc{r} console. +First, load \TRONCO{} in your \textsc{R} console and the example \textit{"dataset"}. <<>>= library(TRONCO) +data(aCML) +hide.progress.bar <<- TRUE @ -Every node in the plotted topology can be colored according to the -color table defined in \textsc{r}. You can use the command -\texttt{colors} to see the available colors, e.g., \texttt{"red"}, \texttt{"blue"} or RGB -\texttt{"\#FF9900FF"}. - -You can start defining the \emph{event types} that you are -considering, and assign them a color. - -As an example, for CGH data we define two types of events, \emph{gain} -and \emph{loss}, which we color \emph{red} and \emph{green} to represent -amplifications or deletion of a chromosome arm. For instance, we can -do this as follows: + +\paragraph{ We use \texttt{show} function to get a short summary of the aCML dataset } <<>>= -types.add("gain", "cornflowerblue") -types.add("loss", "brown1") +show(aCML) @ -If many types have to be defined it might be convenient to load all of -them at once. This is possible by using a tabular input file -(in \texttt{csv} format): -\[ -\texttt{type\_name, type\_color} \qquad\qquad \text e.g., \quad \texttt{red, gain} -\] -and issuing the command \texttt{types.load("types.txt")} -- if types -are defined in file \texttt{types.txt}. The output produced by -\TRONCO{} might show warnings due to, e.g., different types assigned -the same color. - -Once types are defined, you can define the set of \emph{events} in -the dataset (which will constitute the progression), give them a \emph{label}, a type and bind them to a -dataset column. Since in general there are much more events than types, it might be convenient to prepare an external file to load via command {\tt events.load("events.txt")}. The format expected for events is similar to the one expected for types, namely as a tabular input file in \texttt{csv} format: -\[ -\texttt{event\_name, event\_type, column\_number} \qquad\qquad \text e.g., \quad \texttt{8p+, gain, 1}\, . -\] -For the ovarian CGH dataset, such a file contains the following rows (we show the first 3 lines) -\begin{verbatim} -8p+, gain, 1 -3p+, gain, 2 -5q-, loss, 3 -...... -\end{verbatim} -which define, as events, gains in arm $p$ of chromosomes $8$ and $3$, losses on arm $q$ of chromosomes $5$, etc. Given the file \emph{events.txt} where are defined the events with the above notation, the events can be loaded from a file as follows. + +\paragraph{ These are all the events it contains } <<>>= - data(events) - events.load(events) +as.events(aCML) @ -Events will constitute the nodes in the progression model. If one is willing to add events in a iterative fashion the command {\tt events.add(event\_name, event\_type, column\_number)} can be used. For instance {\tt events.add("8q+", "gain", 1)}. - -At this point, \TRONCO{} executes some consistency checks to ensure that all the added events are of a declared type, and report the user potential inconsistencies. +\paragraph{ Which account for alterations in the following genes } +<<>>= +as.genes(aCML) +@ +\paragraph{ These are \texttt{SETBP1} alterations across input samples } +<<>>= +as.gene(aCML, genes='SETBP1') +@ +\paragraph{ These are the genes for which we found a literature supporting the patterns +that we include below. References are in the main \textit{CAPRI} paper. } +<<>>= +gene.hypotheses = c('KRAS', 'NRAS', 'IDH1', 'IDH2', 'TET2', 'SF3B1', 'ASXL1') +@ -\paragraph{\large 2. Data loading \& Progression inference}{\ }\\ +\paragraph{ Regardless the distinct types of mutations that we included, we want to +select only genes altered in $5\%$ of the cases. Thus we first transform +data in \textit{"Alteration"} (collapsing all event types for the same gene), and +then we use select only those events } +<<>>= +alterations = events.selection(as.alterations(aCML), filter.freq = .05) +@ -Once events are set, you can load the input dataset, which must be -stored in a text file as a binary matrix (once loaded, you can use {\tt tronco.data.view(your\_data)} to visualise loaded data as a heatmap). +\paragraph{ We visualize the selected genes. This plot has no title since name annotation +is not copied by \texttt{events.selection} } <<>>= -data(ov.cgh) -data.load(ov.cgh) +dummy = oncoprint(alterations) +@ +<>= +capture.output(oncoprint(alterations, file='onco-1.pdf'), file='NUL') +@ +\incfig[ht]{onco-1}{0.9\textwidth}{Oncoprint output}{} + +\FloatBarrier + +\paragraph{\large Adding Hypotheses}{\ }\\ + +\paragraph{ Then to reconstruct the aCML model we select from \texttt{data} which have been +selected in \texttt{alteration} - via \texttt{as.genes(alterations)} or that are +part of the prior in \texttt{gene.hypotheses}. We use \texttt{filter.in.names} to +force selection of all the events involving those genes from \texttt{data} } <<>>= -str(data.values) +hypo = events.selection(aCML, filter.in.names=c(as.genes(alterations), gene.hypotheses)) +hypo = annotate.description(hypo, 'CAPRI - Bionformatics aCML data (selected events)') @ -In this case 87 samples are available and 7 events are considered (in general, the inference problem is well posed if there are more samples than events, which is the case here for ovarian). - -Further consistency checks are performed by \TRONCO{} at data-loading time; these include checking that: -\begin{itemize} -\item All the columns of the dataset are assigned a unique event; -\item There are no identical columns in the dataset. If this is the - case, the columns get merged and the events associated get merged - too (a default type is assigned in this case); -\item There are no columns in the dataset solely constituted by 0s - or 1s. If this is the case, the columns and the events associated - are deleted. -\end{itemize} -\TRONCO{} signals the user that the data presents some inconsistency, if that is the case. Once the input is loaded, \CAPRESE{} can -be executed. - -\begin{figure}[t]\center -{\includegraphics[width=0.5\textwidth]{vignette-007}} -\caption{\textbf{Ovarian cancer CGH tree reconstructed with CAPRESE.} - We show the result of reconstruction with \CAPRESE{}. - These trees are plot as explained in \S $2$ and {$3$}. The - tree is the reconstructed model without confidence information.} -\label{fig:tree} -\end{figure} +\paragraph{ We show selected data and we annotate genes in \texttt{gene.hypotheses} to identify +them. Samples names are also shown } <<>>= -topology <- tronco.caprese(data.values, lambda=0.5) +dummy = oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T) @ -In the above example, \CAPRESE{} is executed with a \emph{shrinkage - coefficient} set to $0.5$ (the default value, if not specified), which -is the optimal value for data containing \emph{false positives} and -\emph{false negatives}. If these were absent, the optimal coefficient -should be set to an arbitrary small value, e.g. $10^{-3}$; in any -case the coefficient must be in $[0,1]$. Notice that \TRONCO{} -provides an \emph{empirical estimation} of the the rate of false -positives and negatives in the data, given the reconstructed model; -this is done via $\ell_2$ distance. - -The returned topology can be printed to screen by using the -\texttt{topology} object print method, or can be visualized by using -the \texttt{tronco.plot} function. -<>= -topology -tronco.plot(topology, title="Ovarian cancer progression with CAPRESE", legend.title="CGH events", - legend.coeff = 1.0, label.coeff = 1.2, legend = TRUE) +<>= +capture.output(oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T, file='onco-2.pdf'), file='NUL') @ +\incfig[ht]{onco-2}{0.9\textwidth}{Oncoprint output}{} -In this case we are assigning a title to the plot, we are requiring -to display a legend (\texttt{ legend = TRUE}), and we are setting custom -size for the text in the legend (\texttt{legend.coeff = 0.7}, $70\%$ -of the default size) and in the model (\texttt{ label.coeff = 1.2}); -see Figure \ref{fig:tree}. +\FloatBarrier -\paragraph{\large 3. Confidence estimation}{\ }\\ +\paragraph{ We now add the hypotheses that are described in CAPRI's manuscript } -\begin{figure}[t]\centerline{ -\fbox{\includegraphics[width=0.33\textwidth]{vignette-008}} -\fbox{\includegraphics[width=0.33\textwidth]{vignette-009}} -\fbox{\includegraphics[width=0.33\textwidth]{vignette-010}} \\ -} +\paragraph{ Add hypotheses of hard exclusivity (XOR) for NRAS/KRAS events (Mutation). The hypothesis is tested +against all other dataset events } +<<>>= +hypo = hypothesis.add(hypo, 'NRAS xor KRAS', XOR('NRAS', 'KRAS')) +@ -\centerline{ -\fbox{\includegraphics[width=0.33\textwidth]{vignette-011}} -\fbox{\includegraphics[width=0.33\textwidth]{vignette-012}} -\fbox{\includegraphics[width=0.33\textwidth]{vignette-013}} -} -\caption{\textbf{Probabilities (input data): visualisation and comparison with model's predictions.} Top: observed - \emph{frequencies} of \emph{observed}, \emph{joint} and - \emph{conditional} distributions of events (conditionals are - restricted according to the reconstructed progression - model) as emerge from the data. Bottom: difference between observed and fitted - probabilities, according to the reconstructed progression.} -\label{fig:distrib} -\end{figure} - -\paragraph{Data and model probabilities.} Before estimating the -confidence of a reconstruction, one might print and visualise the -\emph{frequency of occurrence} for each event, the \emph{ joint - distribution} and the \emph{conditional distribution} according to -the input data (i.e., the \emph{observed} probabilities). Notice -that for the conditional distribution we condition only on the parent -of a node, as reconstructed in the returned model. Plots of these distributions are shown in Figure -\ref{fig:distrib}, and are evaluated as follows. -<>= - confidence.data.single(topology) +\paragraph{ Here we try to include also a soft exclusivity (OR) pattern but, since its \textit{"signature"} +is the same of the hard one, it will not be included. The code below is commented because it gives errors. } +<>= +hypo = hypothesis.add(hypo, 'NRAS or KRAS', OR('NRAS', 'KRAS')) @ -<>= - confidence.data.joint(topology) + +\paragraph{ For the sake to better highlight the perfect (hard) exclusivity between NRAS/KRAS +mutations one can visualize their alterations } +<<>>= +dummy = oncoprint(events.selection(hypo, filter.in.names = c('KRAS', 'NRAS'))) @ -<>= - confidence.data.conditional(topology) +<>= +capture.output(oncoprint(events.selection(hypo, filter.in.names = c('KRAS', 'NRAS')), file='onco-3.pdf'), file='NUL') @ +\incfig[ht]{onco-3}{0.9\textwidth}{Oncoprint output}{} -In a similar way, by using \texttt{ confidence.fit.single(topology)}, -\texttt{ confidence.fit.joint(topology)} or -\texttt{confidence.fit.conditional(topology)}, the analogous -probabilities can be assessed according to the model. This are not -shown in this vignette. +\FloatBarrier -The difference between observed and fit probabilities can be -visualised as follows. -<>= -confidence.single(topology) +\paragraph{ This is as above, but includes other events. Again, we can include only the hard exclusivity pattern } +<<>>= +hypo = hypothesis.add(hypo, 'SF3B1 xor ASXL1', XOR('SF3B1', OR('ASXL1')), '*') +@ +<>= +hypo = hypothesis.add(hypo, 'SF3B1 or ASXL1', OR('SF3B1', OR('ASXL1')), '*') @ -<>= -confidence.joint(topology) + +\paragraph{ We now do the same for TET2 and IDH2. In this case 3 events for TET2 are present, which are +\textit{"Ins/Del"}, \textit{"Missense point"} and \textit{"Nonsense point"}. For this reason, since we are not specifying +a subset of such events all TET2 alterations are used. Since these show a perfect hard exclusivity +trend these will be included in XOR. } +<<>>= +as.events(hypo, genes = 'TET2') +hypo = hypothesis.add(hypo, 'TET2 xor IDH2', XOR('TET2', 'IDH2'), '*') @ -<>= -confidence.conditional(topology) +<>= +hypo = hypothesis.add(hypo, 'TET2 or IDH2', OR('TET2', 'IDH2'), '*') @ +<<>>= +dummy = oncoprint(events.selection(hypo, filter.in.names = c('TET2', 'IDH2'))) +@ +<>= +capture.output(oncoprint(events.selection(hypo, filter.in.names = c('TET2', 'IDH2')), file='onco-4.pdf'), file='NUL') +@ +\incfig[ht]{onco-4}{0.9\textwidth}{Oncoprint output}{} -\paragraph{Bootstrap confidence.}{\ }\\ +\FloatBarrier -Confidence in a model can be estimated via \emph{parametric} and -\emph{non-parametric bootstrap}. In the former case, the model is -assumed to be correct and data is sampled by the model, in the latter -case resamples are taken from the input data, with repetitions. In any -case, the reconstruction confidence is the number of times that the -estimated tree or edge is inferred out of a number of -resamples. The parameters of the bootstrap procedure can be custom -set. +\paragraph{ For every gene that has more than one event associated we also add a soft exclusivity pattern +for its events } +<<>>= +hypo = hypothesis.add.homologous(hypo) +@ +\paragraph{ The dataset input to CAPRI is shown } <<>>= -set.seed(12345) -topology <- tronco.bootstrap(topology, type="non-parametric", nboot=1000) +dummy = oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T) @ -<>= -tronco.bootstrap.show(topology) +<>= +capture.output(oncoprint(hypo, gene.annot = list(priors= gene.hypotheses), sample.id = T, file='onco-5.pdf'), file='NUL') @ +\incfig[ht]{onco-5}{0.9\textwidth}{Oncoprint output}{} + +\FloatBarrier +\paragraph{\large Model reconstruction}{\ }\\ -In this case, for instance, we are performing non-parametric bootstrap -(the default one) with $1000$ repetitions and, since no shrinkage -coefficient is specified, we are still using $0.5$. Here the estimated -error rates are used to include noise levels estimated from the -data/model. To perform parametric bootstrap is enough to use the flag -\texttt{ type="parametric"}. +\paragraph{ We execute CAPRI with its default parameter: we use both AIC/BIC regularizators, Hill-climbing +exhaustive bootstrap (100 replicates for Wilcoxon testing), p-value 0.05 and we set seed } <<>>= -set.seed(12345) -topology <- tronco.bootstrap(topology, type="parametric", nboot=1000) +model = tronco.capri(hypo, boot.seed = 12345, regularization='bic', nboot=6) @ -<>= -tronco.bootstrap.show(topology) + +\paragraph{ We can plot the reconstructed model. We set some parameters to get a fancy plot; confidence +is shown as temporal priority and probability raising (selective advantage scores) and +hypergeometric testing (goodness of input data). } +<>= +tronco.plot(model, + fontsize = 13, + scale.nodes = .6, + confidence = c('tp', 'pr', 'hg'), + height.logic = 0.25, + legend.cex = .5, + pathways = list(priors= gene.hypotheses)) @ +\incfig[ht]{vignette-figplot}{0.9\textwidth}{aCML Reconstructed model} +{Pre bootstrap.} +\FloatBarrier +\paragraph{\large Bootstrapping data}{\ }\\ -Results of bootstrapping are visualized as a table (useful for edge -confidence), and as a heatmap by using command -\texttt{tronco.bootstrap.show}. The overall model confidence is -reported, too. In Figure 3 results of bootstrap are -shown. If one is willing to visualize this confidence in the plot of -the inferred tree an input flag \texttt{confidence} can be used with -function \texttt{tronco.plot}. For instance: -<>= -tronco.plot(topology, title="Ovarian cancer progression with CAPRESE", legend.title="CGH events", - legend.coeff = 1.0, label.coeff = 1.2, legend = TRUE, confidence = TRUE) +<<>>= +model.boot = tronco.bootstrap(model, nboot=6) @ -In this case, the thicker lines reflect the most confident edges; -confidence is also reported as labels of edges, as shown in -Figure 4 -% -% -% These are visualized in Figure \ref{fig:bootstrap}. +<>= +tronco.plot(model.boot, + fontsize = 13, + scale.nodes = .6, + confidence=c('npb'), + height.logic = 0.25, + legend.cex = .5) +@ +\incfig[ht]{vignette-figplotboot}{0.9\textwidth}{aCML Reconstructed model} +{After bootstrap.} + + +\end{document} -\begin{figure}[t]\center -\fbox{\includegraphics[width=0.45\textwidth]{vignette-015}} -\fbox{\includegraphics[width=0.45\textwidth]{vignette-017}} -\caption{\textbf{Bootstrap for edge confidence.} Non-parametric and parametric confidence in each reconstructed edge as assessed via bootstrapping.} -\label{fig:bootstrap} -\end{figure} -\begin{figure}[t]\center -\fbox{\includegraphics[width=0.45\textwidth]{vignette-018}} -\caption{\textbf{Bootstrap information included in the model.} You can include the result of edge confidence estimation via bootstrap by using flag {\tt confidence}. In this case the thickness of each edge is proportional to its estimated confidence.} -\label{fig:bootstrap} -\end{figure} +% buggone -> fix http://stackoverflow.com/questions/12481267/in-r-how-to-prevent-blank-page-in-pdf-when-using-gridbase-to-embed-subplot-insi ?? -\end{document} \ No newline at end of file diff --git a/vignettes/vignette.pdf b/vignettes/vignette.pdf deleted file mode 100644 index b31beb4f..00000000 Binary files a/vignettes/vignette.pdf and /dev/null differ diff --git a/vignettes/workflow.png b/vignettes/workflow.png deleted file mode 100644 index 4b8514c9..00000000 Binary files a/vignettes/workflow.png and /dev/null differ