Add last changes to get accessibility from bed, and testing

raphaelmourad · Jun 1, 2021 · 789391c · 789391c
1 parent 0d410eb
commit 789391c
Show file tree

Hide file tree

Showing 12 changed files with 1,435 additions and 1,960 deletions.
diff --git a/R/DeepG4.R b/R/DeepG4.R
@@ -79,9 +79,10 @@ DeepG4 <- function(X = NULL,X.atac = NULL,Y=NULL,model=NULL,lower.case=F,treshol
             stop(paste0("X.atac must be a numerical vector."),
                  call. = FALSE)
         }
-        default_model <- ifelse(is.null(X.atac),system.file("extdata", "DeepG4_classic_rescale_BW_sampling_02_03_2021/2021-03-02T16-17-28Z/best_model.h5", package = "DeepG4"),
-                                system.file("extdata", "DeepG4_ATAC_rescale_BW_sampling_02_03_2021/2021-03-02T16-01-34Z/best_model.h5", package = "DeepG4"))
     }
+    default_model <- ifelse(is.null(X.atac),system.file("extdata", "DeepG4_classic_rescale_BW_sampling_02_03_2021/2021-03-02T16-17-28Z/best_model.h5", package = "DeepG4"),
+                            system.file("extdata", "DeepG4_ATAC_rescale_BW_sampling_02_03_2021/2021-03-02T16-01-34Z/best_model.h5", package = "DeepG4"))
+    log_odds_index <- ifelse(is.null(X.atac),7,9)
     ## Check sequences sizes
     message("Check sequences sizes...")
     seqsizes <- Biostrings::nchar(X)
@@ -171,7 +172,7 @@ DeepG4 <- function(X = NULL,X.atac = NULL,Y=NULL,model=NULL,lower.case=F,treshol
                               validation_split = 0.2,
                               verbose= 0)
         message("Done !...")
-        res <- stats::predict(model,X)
+        res <- stats::predict(model,X_inputs)
         if(retrain.path == ""){
             retrain.path <- paste0("DeepG4_retrained_",Sys.Date(),".hdf5")
         }
@@ -197,7 +198,7 @@ DeepG4 <- function(X = NULL,X.atac = NULL,Y=NULL,model=NULL,lower.case=F,treshol
         if(log_odds){
             # If log_odds is set to TRUE, return instead a real number computed by the layer before the sigmoid activation (or the last layer without sigmoid)
             model <- keras::keras_model(inputs = model$input,
-                                        outputs = keras::get_layer(model, index = 7)$output)
+                                        outputs = keras::get_layer(model, index = log_odds_index)$output)
         }
         res <- stats::predict(model,X_inputs)
     }

diff --git a/R/DeepG4InputFromBED.R b/R/DeepG4InputFromBED.R
@@ -1,15 +1,4 @@
-#' Title
-#'
-#' @param BED
-#' @param ATAC
-#' @param is.bw
-#' @param GENOME
-#'
-#' @return
-#' @export
-#'
-#' @examples
-DeepG4InputFromBED <- function(BED = NULL,ATAC = NULL,is.bw = TRUE,GENOME = NULL){
+DeepG4InputFromBED <- function(BED = NULL,ATAC = NULL,is.bw = TRUE,GENOME = NULL,use.bg = TRUE,windows_bg=5000,treshold_bg = 2){
     if (is.null(GENOME)) {
         stop("GENOME must be provided (see ?DeePG4InputFromBED for accepted formats).",
              call. = FALSE)
@@ -94,9 +83,24 @@ DeepG4InputFromBED <- function(BED = NULL,ATAC = NULL,is.bw = TRUE,GENOME = NULL
              call. = FALSE)
     }
     #Normalize ATAC-seq using the previously computed bins
+    BED$order <- 1:length(BED)
+    X <- Biostrings::getSeq(GENOME,BED)
+    BED <- BiocGenerics::sort(GenomeInfoDb::sortSeqlevels(BED))
+
     binbed <- rtracklayer::import.bed(system.file("extdata", "random_region_for_scaling_min_max.bed", package = "DeepG4"))
     ATAC <- NormBW(ATAC,binbed)
     X.ATAC <- getScoreBW(ATAC,BED)
-    X <- Biostrings::getSeq(GENOME,BED)
+    X.ATAC[is.na(X.ATAC)] <- 0
+
+    if(use.bg){
+        BED.bg <- resize(BED,windows_bg,fix="center")
+        X.ATAC.bg <- getScoreBW(ATAC,BED.bg)
+        X.ATAC.bg[is.na(X.ATAC.bg)] <- 0
+        my_test <- (X.ATAC/X.ATAC.bg)<treshold_bg
+        X.ATAC[my_test] <- 0
+    }
+
+    X.ATAC <- X.ATAC[order(BED$order)]
+
     return(list(X,as.vector(X.ATAC)))
 }
diff --git a/R/ExtractMotifFromModel.R b/R/ExtractMotifFromModel.R
@@ -10,10 +10,10 @@
 #'  built the corresponding position count matrix.
 #' @return A list of Position Count Matrix.
 #' @export
-ExtractMotifFromModel <- function(X = NULL,Y=NULL,lower.case=F,top_kernel = 20){
+ExtractMotifFromModel <- function(X = NULL,Y=NULL,lower.case=F,top_kernel = 20,model.atac = T){
     seq.size <- 201
     kernel_size <- 20
-    tabv = c("N"=5,"T"=4,"G"=3,"C"=2,"A"=1)
+    tabv = c("T"=4,"G"=3,"C"=2,"A"=1)
     #Check if X is provided
     if (is.null(X)) {
         stop("X must be provided (see ?DeepG4 for accepted formats).",
@@ -82,12 +82,21 @@ ExtractMotifFromModel <- function(X = NULL,Y=NULL,lower.case=F,top_kernel = 20){
         })
         X_oh <- array(unlist(X_by_size), dim = c(length(X),seq.size,length(tabv)))
     }
-    model <-  system.file("extdata", "model.hdf5", package = "DeepG4")
-    #Load model with keras (tensorflow must be installed as well)
-    model <- keras::load_model_hdf5(model)
-    weights <- keras::get_weights(object = model)[[1]]
-    Convolution <- keras::keras_model(inputs = model$input,
-                               outputs = keras::get_layer(model, index = 2)$output)
+    if(model.atac){
+        # IF TRUE, we use the model with accessibility and input will be differents
+        model <- system.file("extdata", "DeepG4_ATAC_rescale_BW_sampling_02_03_2021/2021-03-02T16-01-34Z/best_model.h5", package = "DeepG4")
+        #Load model with keras (tensorflow must be installed as well)
+        model <- keras::load_model_hdf5(model)
+        Convolution <- keras::keras_model(inputs = model$input[[1]],
+                                          outputs = keras::get_layer(model, index = 2)$output)
+    }else{
+        model <- system.file("extdata", "DeepG4_classic_rescale_BW_sampling_02_03_2021/2021-03-02T16-17-28Z/best_model.h5", package = "DeepG4")
+        #Load model with keras (tensorflow must be installed as well)
+        model <- keras::load_model_hdf5(model)
+        Convolution <- keras::keras_model(inputs = model$input,
+                                          outputs = keras::get_layer(model, index = 2)$output)
+    }
+
     res <- stats::predict(Convolution,X_oh)
     kernels_information <- colSums(apply(res,c(1,3),max))
     nb_of_kernels <- length(kernels_information)

diff --git a/R/getScoreBW.R b/R/getScoreBW.R
@@ -9,11 +9,10 @@
 #' @examples
 getScoreBW <- function (WIG, BED)
 {
-    res <- lapply(split(BED, droplevels(GenomeInfoDb::seqnames(BED))), function(zz) {
+    res <- do.call("rbind",lapply(split(BED, droplevels(GenomeInfoDb::seqnames(BED))), function(zz) {
         cov <- WIG[[unique(as.character(GenomeInfoDb::seqnames(zz)))]]
         score <- IRanges::Views(cov, start = BiocGenerics::start(zz), end = BiocGenerics::end(zz))
-        return(IRanges::viewMeans(score))
-    })
-    res <- do.call("c",res)
-    return(res)
+        return(as.matrix(score))
+    }))
+    return(rowMeans(res))
 }
diff --git a/README.Rmd b/README.Rmd
@@ -55,6 +55,7 @@ devtools::install_github("morphos30/DeepG4")
 
 ## Basic usage of DeepG4
 
+### With accessibility 
 Given small regions (bed) and an accessibility file (coverage file from ATAC-seq/DNAse-seq/MNase-seq), you can predict active G4 regions in a __specific cell type__:
 
 ```r
@@ -77,32 +78,31 @@ Input_DeepG4
 [[1]]
   A DNAStringSet instance of length 100
       width seq
-  [1]   201 CTGCTGGAGCCCGCCTTACTGTGGGGTGGGGGGGGTACTGCCCTAAGAACTCCAC...CCCAGCATAGACAACCGTGAAAGCCAGAAGAGCTGGCAGAGTCTAGAAGTTGGC
-  [2]   201 CCCCTTGCCAGCCTACCTGGCTCAGGCCCGCCGCGCCCGCAGCCCCAGCGCGGTC...AGAGGACGCAGGCGAGAGGAACTCGGCGGCGCGGCGCCCGCGGCCTATTGGCTG
-  [3]   201 CACACACACTCTTATCAGGCTGGGGCAGGCACTGGCACTGCTGAGTCACCCACAG...TGACTGCTGGGGTTTTCCTCTCCCTAGCCCTTTGATTGAGTCAGGGGTGGGGAT
-  [4]   201 TCCGGCCCCGACCCCGCCCCTCCACGCCCCCAAGGAAACATGTCACTCGGGTTAC...CCACCCCGCGAGAGGCGCCGCCGGCGCCAGGTCCCAGTAGCGGGTGGGTCCTTG
-  [5]   201 GTCCCCACCCCCACCCCACTCAGGGCCAGTGCCTCCCCCTCTCCCGTCAGCTCAA...CACAGATTTTTCACTTCCTGTGCAGTCAGAAAAAGAGGCTCGAGGCTCCCGCTC
+  [1]   201 GTTCGGGCCTCGGTCGCGCCGCCGGGTCTTGCAGACGCGAATGTAAACAGAAACA...TGACTCCTGGAGCGACCTTCACGAGGGAAAGCGCGCCCCCCGGCACCCACCCCT
+  [2]   201 TTTCTATAGTTTTCTTTTGTTTCTACCTCATGACTAGATGATTCACTGCTTGAAC...GTCAAATCTGTCCATCTTCACTGCCACCCTTCAGTACCAAATGACCAGTCTCTT
+  [3]   201 GCTTAAAAGCCTGTAAGAAAGATATAATTTGATAGAACTGGCTAGGATTTGTCAG...CGTCAGGGAGGGGGTGGGGCCTCCACGTGGGAGATCTTGCCTGGAGGTGGTGGA
+  [4]   201 TCCCACACCCGGTAGATGTAAGGGAAAAACTGCATTACCCAGAAGGCACTGCCCC...GTGTGACGTCATCTCCGTGGGCCGGTTTGGCCCTGAAACAGTGTGGGGCCTAGA
+  [5]   201 AGTAGCTACAGAGTTCCTGCTCCAGCAACCAGGAGCCTTGAGGCAGCACAAGGAC...ACCACAATGTCTGCCAAGAAAGAGGATGAGTCACCAAGACCCACAGGAAAGAGG
   ...   ... ...
- [96]   201 CAACCTGCACAGGCCCAGCAGGGCCCCTCCAGGTCTAGGTGAGCAGAGCTCCACC...GCGTGTGGGGGCGGGATACCGACGCGGCTCGGCTGCCGATTGGTCAGAAGAGGA
- [97]   201 ATAACGCGTTGGCCCTTAAGAAAGATGGCATCTTTCCGCCTTCTCTGCCCCCTTC...GCCTGCGCCCGCGACGGAGGCGCGCTTCAAAGCGCAGGCGCGGGGAGGGGGTGG
- [98]   201 TCCCTCTCCTCTTCCACGCCCCCTTCCCACTCCTCCCCCTCCTATCCTCTCCTGG...GGGGCGCCGGGCGGCCGGCGCGCTTGGCGGCAGCCGTGGGAGGCAGGCCGGCAG
- [99]   201 GCGTATCCAGTCCCGCAGCTGACCAATCGGAGCTCGCCCTTCCGGGGCCCGCCCC...CTTCGGATCGCCGAGTAACGCTCACCAGACGTCCCGGCCCTGCCCCTCACCTGA
-[100]   201 TTTAATCTGACTCATCTCCTTTGTAAACAGTAAGGTTATTGAGGGTGAAGATTAG...AAGGTGCCGCGTTTATAAAACTCACCCAAGGTTGGCCGGACGCAGTGGCTTACG
+ [96]   201 CACATGCCTTCCTTGGGGACGTGTTCACACATGTGGCCCTAGCTGTGAGAGACAG...CATCTCAGAACAGCTGAGCTGGAAGTGGGTGAATAATAATAATAATAATAATAA
+ [97]   201 TGGTGGTCTTTCTCTACCGGGCCTGGTAGCCAAAGACAAAGGTCATAATCACTTG...CTATGTACTCTTCAAAGTGCCACCTCCTGGCTGCAAGCCAACCAACACAAAACC
+ [98]   201 TGACCGTAGACCTCGTGCACTTCTGCTGCGGTCGGGGCCGGAGTCTGGGCTGGAG...GCGATCCAGAGCCAAGCGCCCCGCCCCTGCCCGGGCGCGCTCCCTCCTTAGCCC
+ [99]   201 TTAACGTCATCAGTCGGGAGGACGACAGCTACGCACGCGCGGGGCACCTCCTCTG...GCCACGGTGGAGGCAGCGGCGAGAGGGGGCGGGGACAAGGAGAGGGCACGCACG
+[100]   201 GTGTCCGGGTGAGAGACCTGGAGGTGGGGCCTAGGTGTCTACCCGGCCAGGTGCG...TAAGGCTCGGGGCCAGTCGTCGTCCATTCCTTCCTAACACCTCCCTATCCTCCC
 
 [[2]]
-  [1] 0.007955412 0.071087932 0.035285844 0.039676268 0.044947411 0.046581121 0.023230827 0.012387618 0.082391016
- [10] 0.063834818 0.105390005 0.070236574 0.045107473 0.014299864 0.050958029 0.067799521 0.037895024 0.070339336
- [19] 0.106714671 0.028458963 0.043110214 0.019460409 0.045468318 0.051063822 0.072862518 0.021933521 0.040137615
- [28] 0.031299792 0.047589241 0.043257913 0.049913830 0.045461665 0.059799129 0.052672064 0.035330758 0.018697152
- [37] 0.048048701 0.028188545 0.041206733 0.064739781 0.044424973 0.038454479 0.049460457 0.013845773 0.054174495
- [46] 0.073740380 0.025285314 0.019293648 0.060486579 0.048533711 0.069902835 0.082091662 0.047145092 0.062299390
- [55] 0.050818736 0.050709304 0.052714895 0.082551809 0.059438781 0.015752492 0.033484694 0.016623619 0.036711227
- [64] 0.066732034 0.070644726 0.028875399 0.028216949 0.062955818 0.078560801 0.036991223 0.024028454 0.090778897
- [73] 0.063415825 0.060858383 0.074535469 0.031581759 0.029659617 0.016819003 0.046221665 0.048693290 0.014504552
- [82] 0.028334125 0.061296958 0.038678159 0.070923668 0.026397743 0.046453539 0.035868461 0.084673908 0.050543118
- [91] 0.042999488 0.034977892 0.040536342 0.039773488 0.077641724 0.055479821 0.053179943 0.039231167 0.068145290
-[100] 0.013834445
-
+  [1] 0.000000000 0.016287416 0.033261447 0.069375103 0.018520650 0.010934717 0.036308476 0.315843234 0.037658374
+ [10] 0.045887551 0.037320211 0.042853401 0.068908093 0.071774485 0.084947561 0.027456211 0.033915868 0.006912598
+ [19] 0.012604675 0.051405275 0.093813195 0.019288668 0.051228826 0.019520666 0.048686840 0.050116329 0.045801884
+ [28] 0.033079207 0.035834917 0.056326946 0.096531489 0.064706374 0.026422647 0.016979087 0.008512502 0.021891554
+ [37] 0.016688682 0.109472225 0.047901838 0.066676075 0.052591085 0.017467983 0.035541899 0.060001992 0.028878783
+ [46] 0.056284886 0.045126048 0.052469122 0.101620595 0.047741155 0.036925371 0.021645371 0.044472962 0.012457179
+ [55] 0.020373459 0.109529076 0.039006694 0.047824384 0.028752257 0.015437852 0.069926660 0.022213134 0.019726120
+ [64] 0.044609840 0.028773493 0.008077349 0.042587371 0.016502886 0.035757895 0.015023933 0.024181422 0.057516040
+ [73] 0.027492004 0.030316917 0.049878433 0.020105394 0.025934350 0.023845766 0.032338052 0.048007935 0.136436151
+ [82] 0.060423998 0.034617445 0.051958662 0.064664156 0.034518694 0.020277026 0.042060108 0.055335700 0.051632313
+ [91] 0.066588875 0.030586623 0.043823259 0.034947155 0.082091662 0.008496193 0.034567766 0.055516400 0.062191534
+[100] 0.049011882
 ```
 
 Then predict using both __DNA__ and __Accessibility__ :
@@ -114,92 +114,66 @@ head(predictions)
 ```
 ```
           [,1]
-[1,] 0.9280036
-[2,] 1.0000000
-[3,] 0.9964566
-[4,] 0.9999996
-[5,] 0.9999791
-[6,] 0.9999921
+[1,] 0.2159184
+[2,] 0.8819393
+[3,] 0.9991976
+[4,] 0.9999995
+[5,] 0.9740031
+[6,] 0.2259631
 ```
 
-## Advanced usage of DeepG4
+### Without accessbility
 
-If you have a large sequence (>201bp up to several Mbp), you can scan the sequence  and predict the positions of active G4s within the sequence.
+You still can predict active G4 regions using only __DNA__ sequences :
 
-``` r
-library(Biostrings)
+```r
+library(rtracklayer)
 library(DeepG4)
-sequences <- readDNAStringSet(system.file("extdata", "promoters_seq_example.fa", package = "DeepG4"))
-res <- DeepG4Scan(X = sequences,k=20,treshold=0.5)
-```
-DeepG4Scan function scans each input sequence with a step of  `k=20` and outputs for each input sequence the G4 positions (+/- 100bp) and the corresponding DeepG4 probabilities (>= treshold).
-
-``` r
-library(dplyr)
-res %>% dplyr::select(-seq) %>% group_by(seqnames) %>% dplyr::slice(1:2) %>%  head
-```
-
-```
-# A tibble: 6 x 5
-# Groups:   seqnames [3]
-  seqnames start   end width score
-     <int> <int> <int> <int> <dbl>
-1        1  1241  1441   201 0.670
-2        1  1261  1461   201 0.659
-3        2  1481  1681   201 0.648
-4        2  1521  1721   201 0.517
-5        3  2161  2361   201 0.723
-6        3  2181  2381   201 0.998
-```
-
-## SNP effect on g-quadruplex using DeepG4
 
-
-Using our model, you can predict the potential effect of a SNP on active G4 formation :
-
-```r
-# Function to obtain ref/alt DNA sequences from the SNP coordinates
-GetSeqFromSNPs <- function(my_granges,wsize = 201){
-    SNP_pos <- (wsize - 1)/2 + 1 
-    ## Compute Fasta
-    SNps.seq.ref <- my_granges %>% anchor_center() %>% mutate(width = wsize) %>% getSeq(BSgenome.Hsapiens.UCSC.hg19.masked,.)
-    ## Replace ref by alt
-    sampleMat <- matrix(FALSE,nrow = length(SNps.seq.ref),ncol = nchar(SNps.seq.ref[1]))
-    sampleMat[,SNP_pos] <- TRUE
-    SNps.seq.alt <- replaceLetterAt(SNps.seq.ref, sampleMat, my_granges$alt)
-    return(c(SNps.seq.ref,SNps.seq.alt))
-}
-# Libraries
-require(GenomicRanges)
-require(Biostrings)
-require(dplyr)
-require(plyranges)
-require(BSgenome.Hsapiens.UCSC.hg19.masked)
-# Make a GRanges object from two known SNPs
-## Genomic positions
-SNPs <- GRanges(c("chr16:87350773","chr19:50093572"))
-## Name and ref/alt alleles
-SNPs$name <- c("rs3748393","rs7249925")
-SNPs$ref <- c("C","A")
-SNPs$alt <- c("A","G")
-
-## Apply our function to get the ref/alt sequence
-SNPs_seq <- SNPs %>% GetSeqFromSNPs
-## And launch DeepG4 on theses sequences
-DeepG4.score <- DeepG4(SNPs_seq,log_odds=T)
-SNPs$DeepG4_ref <- DeepG4.score[1:length(SNPs),]
-SNPs$DeepG4_alt <- DeepG4.score[(length(SNPs)+1):nrow(DeepG4.score),]
-SNPs <- SNPs %>% mutate(DeltaScore = DeepG4_alt-DeepG4_ref)
-SNPs %>% as_tibble()
+sequences <- readDNAStringSet(system.file("extdata", "test_G4_data.fa", package = "DeepG4"))
+predictions <- DeepG4(X=sequences)
+head(predictions)
 ```
 
 ```
-# A tibble: 2 x 11
-  seqnames    start     end width strand name    ref   alt   DeepG4_ref DeepG4_alt DeltaScore
-  <fct>       <int>   <int> <int> <fct>  <chr>   <chr> <chr>      <dbl>      <dbl>      <dbl>
-1 chr16    87350773  8.74e7     1 *      rs3748… C     A           1.66     -0.462      -2.12
-2 chr19    50093572  5.01e7     1 *      rs7249… A     G          -1.93      0.584       2.51
-```
+          [,1]
+[1,] 0.9478214
+[2,] 0.5868858
+[3,] 0.9660227
+[4,] 0.9093548
+[5,] 0.9119551
+[6,] 0.2471965
+```
+
+<!-- ## Advanced usage of DeepG4 -->
+
+<!-- If you have a large sequence (>201bp up to several Mbp), you can scan the sequence  and predict the positions of active G4s within the sequence. -->
+
+<!-- ``` r -->
+<!-- library(Biostrings) -->
+<!-- library(DeepG4) -->
+<!-- sequences <- readDNAStringSet(system.file("extdata", "promoters_seq_example.fa", package = "DeepG4")) -->
+<!-- res <- DeepG4Scan(X = sequences,k=20,treshold=0.5) -->
+<!-- ``` -->
+<!-- DeepG4Scan function scans each input sequence with a step of  `k=20` and outputs for each input sequence the G4 positions (+/- 100bp) and the corresponding DeepG4 probabilities (>= treshold). -->
+
+<!-- ``` r -->
+<!-- library(dplyr) -->
+<!-- res %>% dplyr::select(-seq) %>% group_by(seqnames) %>% dplyr::slice(1:2) %>%  head -->
+<!-- ``` -->
+
+<!-- ``` -->
+<!-- # A tibble: 6 x 5 -->
+<!-- # Groups:   seqnames [3] -->
+<!--   seqnames start   end width score -->
+<!--      <int> <int> <int> <int> <dbl> -->
+<!-- 1        1  1241  1441   201 0.670 -->
+<!-- 2        1  1261  1461   201 0.659 -->
+<!-- 3        2  1481  1681   201 0.648 -->
+<!-- 4        2  1521  1721   201 0.517 -->
+<!-- 5        3  2161  2361   201 0.723 -->
+<!-- 6        3  2181  2381   201 0.998 -->
+<!-- ``` -->
 
 
 ## Scan DeepG4 DNA motifs from the input sequences
@@ -230,18 +204,24 @@ If you want to use our model architecture, but retrain with your own dataset, yo
 library(Biostrings)
 library(DeepG4)
 library(rsample)
+library(BSgenome.Hsapiens.UCSC.hg19)
+
 
+ATAC <- system.file("extdata", "Peaks_BG4_G4seq_HaCaT_GSE76688_hg19_201b_Accessibility.bw", package = "DeepG4")
+ATAC <- import.bw(ATAC)
 # Read positive and segative set of sequences 
-sequences.pos <- readDNAStringSet(system.file("extdata", "Peaks_BG4_G4seq_HaCaT_GSE76688_hg19_201b.Fa", package = "DeepG4"))
-sequences.ctrl <- readDNAStringSet(system.file("extdata", "Peaks_BG4_G4seq_HaCaT_GSE76688_hg19_201b_Ctrl_gkmSVM.Fa", package = "DeepG4"))
-sequences <- c(sequences.pos,sequences.ctrl)
+bed.pos <- import.bed(system.file("extdata", "Peaks_BG4_G4seq_HaCaT_GSE76688_hg19_201b.bed", package = "DeepG4"))
+bed.neg <- import.bed(system.file("extdata", "Peaks_BG4_G4seq_HaCaT_GSE76688_hg19_201b_Ctrl_gkmSVM.bed", package = "DeepG4"))
+
 # Generate classes
-Y <- c(rep(1,length(sequences.pos)),rep(0,length(sequences.ctrl)))
+Y <- c(rep(1,length(bed.pos)),rep(0,length(bed.neg)))
+BED <- c(bed.pos,bed.neg)
+Input_DeepG4 <- DeepG4InputFromBED(BED=BED,ATAC = ATAC,GENOME=BSgenome.Hsapiens.UCSC.hg19)
 
 ```
 
 ```r
-training <- DeepG4(sequences,Y,retrain=TRUE,retrain.path = "DeepG4_retrained.hdf5")
+training <- DeepG4(X=Input_DeepG4[[1]],X.atac=Input_DeepG4[[2]],Y,retrain=TRUE,retrain.path = "DeepG4_retrained.hdf5")
 ```
 
 You can now take a look on the results :
@@ -262,9 +242,9 @@ training[[4]]
 # A tibble: 4 x 3
   .metric     .estimator .estimate
   <chr>       <chr>          <dbl>
-1 accuracy    binary         0.976
-2 kap         binary         0.952
-3 mn_log_loss binary        11.5  
-4 roc_auc     binary         0.997
+1 accuracy    binary        0.989 
+2 kap         binary        0.978 
+3 mn_log_loss binary        0.0429
+4 roc_auc     binary        0.999 
 ```