Skip to content

Commit

Permalink
Merge branch 'data_exploration' of github.com:yoann-dufresne/rHashGen…
Browse files Browse the repository at this point in the history
… into data_exploration
  • Loading branch information
yoann-dufresne committed Oct 9, 2024
2 parents dda7e11 + c6ba95f commit 2fa6af7
Show file tree
Hide file tree
Showing 8 changed files with 308 additions and 2 deletions.
125 changes: 125 additions & 0 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
library(Rcpp)
setwd("~/git/rHashGen/data_exploration")
# Chargez le fichier C++ (remplacez le chemin par celui du fichier .cpp)
sourceCpp("hash_functions_rcpp.cpp")
# Exemple d'utilisation des fonctions
val <- 12345
hashed_value <- hash_R(val)
inverted_value <- inverted_hash_R(hashed_value)
cat("Valeur hashée :", hashed_value, "\n")
cat("Valeur inversée :", inverted_value, "\n")
x <- "ATTAC"
input <- "ATTACA"
inpulist <- strsplit(input, "")[[1]]
inpulist
?switch
i = 1
transfo <- function(input) {
inpulist <- strsplit(input, "")[[1]]
out <- rep(NA, length(inpulist))
for (i in seq_along(inpulist)) {
charac.i <- inputlist[[i]]
out[i] <- switch(charac.i, A = 0, T = 1, G = 2, C = 3)
}
return(out)
}
transfo(x)
transfo <- function(input) {
inputlist <- strsplit(input, "")[[1]]
out <- rep(NA, length(inputlist))
for (i in seq_along(inputlist)) {
charac.i <- inputlist[[i]]
out[i] <- switch(charac.i, A = 0, T = 1, G = 2, C = 3)
}
return(out)
}
transfo(x)
31/7
31/11
31/13
31/2
31/3
31/5
31/7
encode_nucl <- function(nucl) {
return(switch(nucl, A = 0, T = 1, G = 2, C = 3))
}
encode_kmer <- function(seq_k) {
encoded_kmer <- 0
for (nucl in seq_k) {
encoded_kmer <- encoded_kmer * 4 + encode_nucl(nucl)
}
return(encoded_kmer)
}
encode_kmer("GATTACA")
encode_kmer <- function(seq_k) {
seq_k <- unlist(strsplit(seq_k, "")[[1]])
encoded_kmer <- 0
for (nucl in seq_k) {
encoded_kmer <- encoded_kmer * 4 + encode_nucl(nucl)
}
return(encoded_kmer)
}
encode_nucl <- function(nucl) {
return(switch(nucl, "A" = 0, "T" = 1, "G" = 2, "C" = 3))
}
encode_kmer <- function(seq_k) {
seq_k <- unlist(strsplit(seq_k, "")[[1]])
encoded_kmer <- 0
for (nucl in seq_k) {
encoded_kmer <- encoded_kmer * 4 + encode_nucl(nucl)
}
return(encoded_kmer)
}
encode_kmer("GATTACA")
3 & 4
??bitwise
bitAnd(3, 2)
bitops::bitAnd(3, 2)
encode_next_kmer <- function(prev_kmer, new_nucl, k) {
encoded_kmer <- prev_kmer
encoded_kmer <- bitwAnd(encoded_kmer, 4**(k-1) - 1)
encoded_kmer <- encoded_kmer * 4 + encode_nucl(new_nucl)
return(encoded_kmer)
}
enco_gattaca <- encode_kmer(seq_k = "GATTACA")
encode_next_kmer <- function(prev_kmer, new_nucl, k) {
encoded_kmer <- prev_kmer
encoded_kmer <- bitwAnd(encoded_kmer, 4**(k-1) - 1)
encoded_kmer <- encoded_kmer * 4 + encode_nucl(new_nucl)
return(encoded_kmer)
}
encode_next_kmer(prev_kmer = enco_gattaca, new_nucl = "T", k = 7)
??yield
?yield
??"flip one bit"
flip_bit <- function(x, n) {
# n is the bit position (0-based index)
# bitwShiftL(1, n) creates a mask where only the nth bit is set to 1
# bitwXor flips that specific bit in x
flipped_x <- bitwXor(x, bitwShiftL(1, n))
return(flipped_x)
}
# Let's flip the 2nd bit (0-based) of the number 5 (binary: 101)
x <- 5 # binary: 101
n <- 1 # 0-based index, flip the 2nd bit (which is 1)
flipped_x <- flip_bit(x, n)
cat("Original number:", x, "\n") # Output: 5
cat("After flipping the 2nd bit:", flipped_x, "\n") # Output: 7 (binary: 111)
# Let's flip the 2nd bit (0-based) of the number 5 (binary: 101)
x <- 234 # binary: 101
n <- 4 # 0-based index, flip the 2nd bit (which is 1)
flipped_x <- flip_bit(x, n)
cat("Original number:", x, "\n") # Output: 5
cat("After flipping the 2nd bit:", flipped_x, "\n") # Output: 7 (binary: 111)
bitwXor(345, flip_bit(345))
bitwXor(345, flip_bit(345, 3))
a <- 345 # entier intitial
a <- 345 # entier intitial
flipa3 <- flip_bit(345, 3) # entier flippé
bitwXor(hash_R(a), hash_R(flipa3))
hash_R(a)
hash_R(flipa3)
which(lambda != 0)
lambda <- c(NA, NA, 1, 0)
which(lambda != 0)
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,12 @@
*.exe
*.out
*.app
.Rproj.user

# Rproj and Maceries
.DS_Store
*.Rproj*
**/.Rproj*

# build
build
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ add_compile_options(-Wall -Wextra -pedantic)
# Clang
#add_compile_options( -Wno-c++98-compat-pedantic -Wno-old-style-cast -Wno-padded -Wno-extra-semi-stmt -Wno-weak-vtables)


if(BUILD_FOR_LOCAL)
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
endif()
Expand Down
35 changes: 35 additions & 0 deletions data_exploration/hash_functions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//Hash function
template <typename myuint>
myuint hash(myuint val)
{
val *= 9;
val ^= val << 17;
val ^= val << 5;
val += val << 19;
val &= 0x7fffffff;
val ^= val >> 3;
val *= 9;
val &= 0x7fffffff;
return val;
}

//Inverted function:
template <typename myuint>
myuint inverted_hash(myuint val)
{
val *= 954437177;
val &= 0x7fffffff;
val ^= val >> 3;
val ^= val >> 6;
val ^= val >> 12;
val ^= val >> 24;
val *= 2146959361;
val ^= val << 5;
val ^= val << 10;
val ^= val << 20;
val ^= val << 17;
val &= 0x7fffffff;
val *= 954437177;
val &= 0x7fffffff;
return val;
}
45 changes: 45 additions & 0 deletions data_exploration/hash_functions_rcpp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include <Rcpp.h>
#include <cstdint> // pour uint32_t

// Hash function
uint32_t hash(uint32_t val) {
val *= 9;
val ^= val << 17;
val ^= val << 5;
val += val << 19;
val &= 0x7fffffff;
val ^= val >> 3;
val *= 9;
val &= 0x7fffffff;
return val;
}

// Inverted hash function
uint32_t inverted_hash(uint32_t val) {
val *= 954437177;
val &= 0x7fffffff;
val ^= val >> 3;
val ^= val >> 6;
val ^= val >> 12;
val ^= val >> 24;
val *= 2146959361;
val ^= val << 5;
val ^= val << 10;
val ^= val << 20;
val ^= val << 17;
val &= 0x7fffffff;
val *= 954437177;
val &= 0x7fffffff;
return val;
}

// Expose the functions to R using Rcpp
// [[Rcpp::export]]
uint32_t hash_R(uint32_t val) {
return hash(val);
}

// [[Rcpp::export]]
uint32_t inverted_hash_R(uint32_t val) {
return inverted_hash(val);
}
91 changes: 91 additions & 0 deletions data_exploration/read.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
library(Rcpp)

# Chargez le fichier C++ (remplacez le chemin par celui du fichier .cpp)
sourceCpp("hash_functions_rcpp.cpp")

# Exemple d'utilisation des fonctions
val <- 12345

hashed_value <- hash_R(val)
inverted_value <- inverted_hash_R(hashed_value)

cat("Valeur hashée :", hashed_value, "\n")
cat("Valeur inversée :", inverted_value, "\n")


x <- "ATTACA"

babytransfo <- function(input) {
inputlist <- strsplit(input, "")[[1]]
out <- rep(NA, length(inputlist))
for (i in seq_along(inputlist)) {
charac.i <- inputlist[[i]]
out[i] <- switch(charac.i, A = 0, T = 1, G = 2, C = 3)
}
return(out)
}

encode_nucl <- function(nucl) {
return(switch(nucl, "A" = 0, "T" = 1, "G" = 2, "C" = 3))
}

encode_kmer <- function(seq_k) {
seq_k <- unlist(strsplit(seq_k, "")[[1]])
encoded_kmer <- 0
for (nucl in seq_k) {
encoded_kmer <- encoded_kmer * 4 + encode_nucl(nucl)
}
return(encoded_kmer)
}

enco_gattaca <- encode_kmer(seq_k = "GATTACA")

encode_next_kmer <- function(prev_kmer, new_nucl, k) {
encoded_kmer <- prev_kmer
encoded_kmer <- bitwAnd(encoded_kmer, 4**(k-1) - 1)
encoded_kmer <- encoded_kmer * 4 + encode_nucl(new_nucl)
return(encoded_kmer)
}

encode_next_kmer(
prev_kmer = enco_gattaca,
new_nucl = "T",
k = 7)


flip_bit <- function(x, n) {
# n is the bit position (0-based index)
# bitwShiftL(1, n) creates a mask where only the nth bit is set to 1
# bitwXor flips that specific bit in x
flipped_x <- bitwXor(x, bitwShiftL(1, n))
return(flipped_x)
}


# Let's flip the 2nd bit (0-based) of the number 5 (binary: 101)
x <- 234 # binary: 101
n <- 4 # 0-based index, flip the 2nd bit (which is 1)
flipped_x <- flip_bit(x, n)

cat("Original number:", x, "\n") # Output: 5
cat("After flipping the 2nd bit:", flipped_x, "\n") # Output: 7 (binary: 111)

a <- 345 # entier intitial
flipa3 <- flip_bit(345, 3) # entier flippé

bitwXor(hash_R(a), hash_R(flipa3))

# La matrice individuelle correspond au lien
# entre un entier hashé (hasha), et le même entier
# flippé hashé (hashaflippé). Ligne à ligne, pour chaque
# bit de hasha, on met un 1 là où ce bit
# correspond à un bit de hashaflippé.

# But de l'opération : pour tous les a possibles,
# créer les matrices de lien avec tous les hashaflippés
# possible et en déduire si les hashaflippés sont bien
# les plus éloignés possibles du hasha.




2 changes: 1 addition & 1 deletion external/clutchlog
Submodule clutchlog updated 216 files

0 comments on commit 2fa6af7

Please sign in to comment.