Skip to content

Commit

Permalink
Remove non-ATCG characters; #1713
Browse files Browse the repository at this point in the history
  • Loading branch information
timoast committed Jun 14, 2024
1 parent 5abe89b commit f229f7c
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions R/footprinting.R
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,8 @@ InsertionBias.ChromatinAssay <- function(
insertions <- Extend(x = insertions, upstream = 3, downstream = 2)
sequences <- as.vector(x = Biostrings::getSeq(x = genome, insertions))
seq.freq <- table(sequences)
# remove sequences containing N
keep.seq <- !grepl(pattern = "N", x = names(x = seq.freq))
# remove sequences containing non-ATCG characters
keep.seq <- !grepl(pattern = "[^ATCG]", x = names(x = seq.freq))
insertion_hex_freq <- as.matrix(x = seq.freq[keep.seq])
genome_freq <- Biostrings::oligonucleotideFrequency(
x = Biostrings::getSeq(x = genome, chr.use),
Expand Down Expand Up @@ -411,6 +411,10 @@ FindExpectedInsertions <- function(dna.sequence, bias, verbose = TRUE) {
# append
x[current.pos:end.pos] <- as.numeric(x = frequencies)
j[current.pos:end.pos] <- jj

# remove frequencies not present in hex.key
frequencies <- frequencies[names(x = frequencies) %in% names(x = hex.key)]

i[current.pos:end.pos] <- as.vector(x = hex.key[names(x = frequencies)])
# shift current position
current.pos <- end.pos + 1
Expand Down

0 comments on commit f229f7c

Please sign in to comment.