diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..48e270f --- /dev/null +++ b/.Rhistory @@ -0,0 +1,125 @@ +library(Rcpp) +setwd("~/git/rHashGen/data_exploration") +# Chargez le fichier C++ (remplacez le chemin par celui du fichier .cpp) +sourceCpp("hash_functions_rcpp.cpp") +# Exemple d'utilisation des fonctions +val <- 12345 +hashed_value <- hash_R(val) +inverted_value <- inverted_hash_R(hashed_value) +cat("Valeur hashée :", hashed_value, "\n") +cat("Valeur inversée :", inverted_value, "\n") +x <- "ATTAC" +input <- "ATTACA" +inpulist <- strsplit(input, "")[[1]] +inpulist +?switch +i = 1 +transfo <- function(input) { +inpulist <- strsplit(input, "")[[1]] +out <- rep(NA, length(inpulist)) +for (i in seq_along(inpulist)) { +charac.i <- inputlist[[i]] +out[i] <- switch(charac.i, A = 0, T = 1, G = 2, C = 3) +} +return(out) +} +transfo(x) +transfo <- function(input) { +inputlist <- strsplit(input, "")[[1]] +out <- rep(NA, length(inputlist)) +for (i in seq_along(inputlist)) { +charac.i <- inputlist[[i]] +out[i] <- switch(charac.i, A = 0, T = 1, G = 2, C = 3) +} +return(out) +} +transfo(x) +31/7 +31/11 +31/13 +31/2 +31/3 +31/5 +31/7 +encode_nucl <- function(nucl) { +return(switch(nucl, A = 0, T = 1, G = 2, C = 3)) +} +encode_kmer <- function(seq_k) { +encoded_kmer <- 0 +for (nucl in seq_k) { +encoded_kmer <- encoded_kmer * 4 + encode_nucl(nucl) +} +return(encoded_kmer) +} +encode_kmer("GATTACA") +encode_kmer <- function(seq_k) { +seq_k <- unlist(strsplit(seq_k, "")[[1]]) +encoded_kmer <- 0 +for (nucl in seq_k) { +encoded_kmer <- encoded_kmer * 4 + encode_nucl(nucl) +} +return(encoded_kmer) +} +encode_nucl <- function(nucl) { +return(switch(nucl, "A" = 0, "T" = 1, "G" = 2, "C" = 3)) +} +encode_kmer <- function(seq_k) { +seq_k <- unlist(strsplit(seq_k, "")[[1]]) +encoded_kmer <- 0 +for (nucl in seq_k) { +encoded_kmer <- encoded_kmer * 4 + encode_nucl(nucl) +} +return(encoded_kmer) +} +encode_kmer("GATTACA") +3 & 4 +??bitwise +bitAnd(3, 2) +bitops::bitAnd(3, 2) +encode_next_kmer <- function(prev_kmer, new_nucl, k) { +encoded_kmer <- prev_kmer +encoded_kmer <- bitwAnd(encoded_kmer, 4**(k-1) - 1) +encoded_kmer <- encoded_kmer * 4 + encode_nucl(new_nucl) +return(encoded_kmer) +} +enco_gattaca <- encode_kmer(seq_k = "GATTACA") +encode_next_kmer <- function(prev_kmer, new_nucl, k) { +encoded_kmer <- prev_kmer +encoded_kmer <- bitwAnd(encoded_kmer, 4**(k-1) - 1) +encoded_kmer <- encoded_kmer * 4 + encode_nucl(new_nucl) +return(encoded_kmer) +} +encode_next_kmer(prev_kmer = enco_gattaca, new_nucl = "T", k = 7) +??yield +?yield +??"flip one bit" +flip_bit <- function(x, n) { +# n is the bit position (0-based index) +# bitwShiftL(1, n) creates a mask where only the nth bit is set to 1 +# bitwXor flips that specific bit in x +flipped_x <- bitwXor(x, bitwShiftL(1, n)) +return(flipped_x) +} +# Let's flip the 2nd bit (0-based) of the number 5 (binary: 101) +x <- 5 # binary: 101 +n <- 1 # 0-based index, flip the 2nd bit (which is 1) +flipped_x <- flip_bit(x, n) +cat("Original number:", x, "\n") # Output: 5 +cat("After flipping the 2nd bit:", flipped_x, "\n") # Output: 7 (binary: 111) +# Let's flip the 2nd bit (0-based) of the number 5 (binary: 101) +x <- 234 # binary: 101 +n <- 4 # 0-based index, flip the 2nd bit (which is 1) +flipped_x <- flip_bit(x, n) +cat("Original number:", x, "\n") # Output: 5 +cat("After flipping the 2nd bit:", flipped_x, "\n") # Output: 7 (binary: 111) +bitwXor(345, flip_bit(345)) +bitwXor(345, flip_bit(345, 3)) +a <- 345 # entier intitial +a <- 345 # entier intitial +flipa3 <- flip_bit(345, 3) # entier flippé +bitwXor(hash_R(a), hash_R(flipa3)) +hash_R(a) +hash_R(flipa3) +which(lambda != 0) +lambda <- c(NA, NA, 1, 0) +which(lambda != 0) diff --git a/.gitignore b/.gitignore index 259148f..98bbe36 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,12 @@ *.exe *.out *.app +.Rproj.user + +# Rproj and Maceries +.DS_Store +*.Rproj* +**/.Rproj* + +# build +build diff --git a/CMakeLists.txt b/CMakeLists.txt index beec0b5..58297bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ add_compile_options(-Wall -Wextra -pedantic) # Clang #add_compile_options( -Wno-c++98-compat-pedantic -Wno-old-style-cast -Wno-padded -Wno-extra-semi-stmt -Wno-weak-vtables) + if(BUILD_FOR_LOCAL) set(CMAKE_BUILD_WITH_INSTALL_RPATH ON) endif() diff --git a/data_exploration/hash_functions.cpp b/data_exploration/hash_functions.cpp new file mode 100644 index 0000000..9a58381 --- /dev/null +++ b/data_exploration/hash_functions.cpp @@ -0,0 +1,35 @@ +//Hash function +template +myuint hash(myuint val) +{ + val *= 9; + val ^= val << 17; + val ^= val << 5; + val += val << 19; + val &= 0x7fffffff; + val ^= val >> 3; + val *= 9; + val &= 0x7fffffff; + return val; +} + +//Inverted function: +template +myuint inverted_hash(myuint val) +{ + val *= 954437177; + val &= 0x7fffffff; + val ^= val >> 3; + val ^= val >> 6; + val ^= val >> 12; + val ^= val >> 24; + val *= 2146959361; + val ^= val << 5; + val ^= val << 10; + val ^= val << 20; + val ^= val << 17; + val &= 0x7fffffff; + val *= 954437177; + val &= 0x7fffffff; + return val; +} \ No newline at end of file diff --git a/data_exploration/hash_functions_rcpp.cpp b/data_exploration/hash_functions_rcpp.cpp new file mode 100644 index 0000000..3e37694 --- /dev/null +++ b/data_exploration/hash_functions_rcpp.cpp @@ -0,0 +1,45 @@ +#include +#include // pour uint32_t + +// Hash function +uint32_t hash(uint32_t val) { + val *= 9; + val ^= val << 17; + val ^= val << 5; + val += val << 19; + val &= 0x7fffffff; + val ^= val >> 3; + val *= 9; + val &= 0x7fffffff; + return val; +} + +// Inverted hash function +uint32_t inverted_hash(uint32_t val) { + val *= 954437177; + val &= 0x7fffffff; + val ^= val >> 3; + val ^= val >> 6; + val ^= val >> 12; + val ^= val >> 24; + val *= 2146959361; + val ^= val << 5; + val ^= val << 10; + val ^= val << 20; + val ^= val << 17; + val &= 0x7fffffff; + val *= 954437177; + val &= 0x7fffffff; + return val; +} + +// Expose the functions to R using Rcpp +// [[Rcpp::export]] +uint32_t hash_R(uint32_t val) { + return hash(val); +} + +// [[Rcpp::export]] +uint32_t inverted_hash_R(uint32_t val) { + return inverted_hash(val); +} diff --git a/data_exploration/read.R b/data_exploration/read.R new file mode 100644 index 0000000..49259be --- /dev/null +++ b/data_exploration/read.R @@ -0,0 +1,91 @@ +library(Rcpp) + +# Chargez le fichier C++ (remplacez le chemin par celui du fichier .cpp) +sourceCpp("hash_functions_rcpp.cpp") + +# Exemple d'utilisation des fonctions +val <- 12345 + +hashed_value <- hash_R(val) +inverted_value <- inverted_hash_R(hashed_value) + +cat("Valeur hashée :", hashed_value, "\n") +cat("Valeur inversée :", inverted_value, "\n") + + +x <- "ATTACA" + +babytransfo <- function(input) { + inputlist <- strsplit(input, "")[[1]] + out <- rep(NA, length(inputlist)) + for (i in seq_along(inputlist)) { + charac.i <- inputlist[[i]] + out[i] <- switch(charac.i, A = 0, T = 1, G = 2, C = 3) + } + return(out) +} + +encode_nucl <- function(nucl) { + return(switch(nucl, "A" = 0, "T" = 1, "G" = 2, "C" = 3)) +} + +encode_kmer <- function(seq_k) { + seq_k <- unlist(strsplit(seq_k, "")[[1]]) + encoded_kmer <- 0 + for (nucl in seq_k) { + encoded_kmer <- encoded_kmer * 4 + encode_nucl(nucl) + } + return(encoded_kmer) +} + +enco_gattaca <- encode_kmer(seq_k = "GATTACA") + +encode_next_kmer <- function(prev_kmer, new_nucl, k) { + encoded_kmer <- prev_kmer + encoded_kmer <- bitwAnd(encoded_kmer, 4**(k-1) - 1) + encoded_kmer <- encoded_kmer * 4 + encode_nucl(new_nucl) + return(encoded_kmer) +} + +encode_next_kmer( + prev_kmer = enco_gattaca, + new_nucl = "T", + k = 7) + + +flip_bit <- function(x, n) { + # n is the bit position (0-based index) + # bitwShiftL(1, n) creates a mask where only the nth bit is set to 1 + # bitwXor flips that specific bit in x + flipped_x <- bitwXor(x, bitwShiftL(1, n)) + return(flipped_x) +} + + +# Let's flip the 2nd bit (0-based) of the number 5 (binary: 101) +x <- 234 # binary: 101 +n <- 4 # 0-based index, flip the 2nd bit (which is 1) +flipped_x <- flip_bit(x, n) + +cat("Original number:", x, "\n") # Output: 5 +cat("After flipping the 2nd bit:", flipped_x, "\n") # Output: 7 (binary: 111) + +a <- 345 # entier intitial +flipa3 <- flip_bit(345, 3) # entier flippé + +bitwXor(hash_R(a), hash_R(flipa3)) + +# La matrice individuelle correspond au lien +# entre un entier hashé (hasha), et le même entier +# flippé hashé (hashaflippé). Ligne à ligne, pour chaque +# bit de hasha, on met un 1 là où ce bit +# correspond à un bit de hashaflippé. + +# But de l'opération : pour tous les a possibles, +# créer les matrices de lien avec tous les hashaflippés +# possible et en déduire si les hashaflippés sont bien +# les plus éloignés possibles du hasha. + + + + diff --git a/external/clutchlog b/external/clutchlog index 28f50d0..7d0252b 160000 --- a/external/clutchlog +++ b/external/clutchlog @@ -1 +1 @@ -Subproject commit 28f50d0badd8471e8ae3221ecd2ac9338ffa9bd3 +Subproject commit 7d0252b659aa989ff71ff80aacfa08d389b46a10 diff --git a/external/paradiseo b/external/paradiseo index 8ea6e2b..51be7e3 160000 --- a/external/paradiseo +++ b/external/paradiseo @@ -1 +1 @@ -Subproject commit 8ea6e2b680d00f82655584fcc199f90803f00fa2 +Subproject commit 51be7e324b07c90d9b1b4b93c7fa9f8d668f76b5