diff --git a/Makefile b/Makefile
index e7c54b6..2e3a1e6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
-all: satc satc_dump satc_merge sig_anch download_kmc nomad
+all: satc satc_dump satc_merge sig_anch download_kmc splash supervised_test
 
-NOMAD_LIBS_DIR = libs
+SPLASH_LIBS_DIR = libs
 LIBS_DIR = . #/usr/local/lib
 INCLUDE_DIR= libs
 MIMALLOC_INLUCDE_DIR = libs/mimalloc/include
@@ -14,14 +14,16 @@ COMMON_DIR=src/common
 OUT_BIN_DIR=bin
 
 CC 	= g++
-CFLAGS	= -fPIC -Wall -O3 -m64 -std=c++17 -mavx -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
-CLINK	= -lm -std=c++17 -lpthread -static-libstdc++ -lgfortran
+CFLAGS	= -fPIC -Wall -O3 -m64 -std=c++17 -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
+CLINK	= -lm -std=c++17 -lpthread -static-libstdc++
 
-release: CLINK	= -lm -std=c++17 -static -lgfortran -lquadmath -Wl,--whole-archive -lpthread -Wl,--no-whole-archive
-release: CFLAGS	= -fPIC -Wall -O3 -DNDEBUG -m64 -std=c++17 -mavx -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
+MIMALLOC_OBJ=libs/mimalloc/mimalloc.o
+
+release: CLINK	= -lm -std=c++17 -static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive
+release: CFLAGS	= -fPIC -Wall -O3 -DNDEBUG -m64 -std=c++17 -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
 release: all
 
-debug: CFLAGS	= -fPIC -Wall -O0 -g -m64 -std=c++17 -mavx -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
+debug: CFLAGS	= -fPIC -Wall -O0 -g -m64 -std=c++17 -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
 debug: all
 
 ifdef MSVC     # Avoid the MingW/Cygwin sections
@@ -47,18 +49,22 @@ prefix      = /usr/local
 # optional install location
 exec_prefix = $(prefix)
 
+$(MIMALLOC_OBJ):
+	$(CC) -DMI_MALLOC_OVERRIDE -O3 -DNDEBUG -fPIC -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden -ftls-model=initial-exec -fno-builtin-malloc -c -I libs/mimalloc/include libs/mimalloc/src/static.c -o $(MIMALLOC_OBJ)
+
 %.o: %.cpp
 	$(CC) $(CFLAGS) -c $< -o $@
 
 satc: $(OUT_BIN_DIR)/satc
 
 $(OUT_BIN_DIR)/satc: $(SATC_MAIN_DIR)/satc.o \
-	$(SATC_MAIN_DIR)/kmc_api/kmc_file.o \
-	$(SATC_MAIN_DIR)/kmc_api/mmer.o \
-	$(SATC_MAIN_DIR)/kmc_api/kmer_api.o
+	$(COMMON_DIR)/kmc_api/kmc_file.o \
+	$(COMMON_DIR)/kmc_api/mmer.o \
+	$(COMMON_DIR)/kmc_api/kmer_api.o \
+	$(COMMON_DIR)/illumina_adapters_static.o
 	-mkdir -p $(OUT_BIN_DIR)
 	$(CC) -o $@ $^ \
-	$(NOMAD_LIBS_DIR)/$(LIB_ZSTD) \
+	$(SPLASH_LIBS_DIR)/$(LIB_ZSTD) \
 	$(CLINK)
 
 satc_merge: $(OUT_BIN_DIR)/satc_merge
@@ -69,7 +75,7 @@ $(OUT_BIN_DIR)/satc_merge: $(SATC_MERGE_MAIN_DIR)/satc_merge.o \
 	$(SATC_MERGE_MAIN_DIR)/extra_stats.o
 	-mkdir -p $(OUT_BIN_DIR)
 	$(CC) -o $@ $^ \
-	$(NOMAD_LIBS_DIR)/$(LIB_ZSTD) \
+	$(SPLASH_LIBS_DIR)/$(LIB_ZSTD) \
 	$(CLINK)
 
 satc_dump: $(OUT_BIN_DIR)/satc_dump
@@ -77,7 +83,7 @@ satc_dump: $(OUT_BIN_DIR)/satc_dump
 $(OUT_BIN_DIR)/satc_dump: $(SATC_DUMP_MAIN_DIR)/satc_dump.o
 	-mkdir -p $(OUT_BIN_DIR)
 	$(CC) -o $@ $^ \
-	$(NOMAD_LIBS_DIR)/$(LIB_ZSTD) \
+	$(SPLASH_LIBS_DIR)/$(LIB_ZSTD) \
 	$(CLINK)
 
 sig_anch: $(OUT_BIN_DIR)/sig_anch
@@ -92,24 +98,26 @@ download_kmc:
 	-mkdir -p $(OUT_BIN_DIR)
 	./download_kmc.sh $(OUT_BIN_DIR)
 
-nomad:
-	cp src/nomad.py bin/nomad
+splash:
+	cp src/splash.py bin/splash
+supervised_test:
+	cp src/supervised_test/supervised_test.R bin
 
 install: all
 	install bin/* /usr/local/bin
 
 uninstall:
-	-rm /usr/local/bin/satc 
+	-rm /usr/local/bin/satc
 	-rm /usr/local/bin/satc_dump
 	-rm /usr/local/bin/satc_merge
-	-rm /usr/local/bin/sig_anch 
-	-rm /usr/local/bin/nomad
+	-rm /usr/local/bin/sig_anch
+	-rm /usr/local/bin/splash
 	-rm /usr/local/bin/kmc
 	-rm /usr/local/bin/kmc_tools
 
-clean:	
+clean:
 	-rm $(SATC_MAIN_DIR)/*.o
-	-rm $(SATC_MAIN_DIR)/kmc_api/*.o	
+	-rm $(COMMON_DIR)/kmc_api/*.o
 	-rm $(SATC_MERGE_MAIN_DIR)/*.o
 	-rm $(SATC_DUMP_MAIN_DIR)/*.o
 	-rm $(COMMON_DIR)/*.o
diff --git a/NOMAD_extendor_classification.R b/NOMAD_extendor_classification.R
index 475949b..1eb0c2e 100644
--- a/NOMAD_extendor_classification.R
+++ b/NOMAD_extendor_classification.R
@@ -41,7 +41,7 @@ riffle <- function(a, b) {   # this function interleaves the elements of two vec
 ##################################################
 ############ input arguments #####################
 args <- commandArgs(trailingOnly = TRUE)
-directory = args[1]  # the output directory used for the NOMAD run
+directory = args[1]  # the output directory used for the SPLASH run
 which_anchors_file = args[2]  # flag to decide which anchor file (after correction or all anchors) to use, could be "after_correction" or "all" 
 effect_size_cutoff = args[3] # the effect size cutoff for significant anchors (default 0.2) 
 num_samples_cutoff = args[4] # the minimum number of sampels for an anchor to be called (default 20)
diff --git a/README.md b/README.md
index ceec790..32ae75f 100644
--- a/README.md
+++ b/README.md
@@ -1,44 +1,45 @@
-# NOMAD 2.0
+# SPLASH 2
 ## Introduction
-NOMAD is an unsupervised and reference-free unifying framework to discover regulated sequence variation through statistical analysis of k-mer composition in both DNA and RNA sequence. 
-NOMAD leverages our observation that detecting sample-regulated sequence variation, such as alternative splicing, RNA editing, gene fusions, V(D)J, transposable element mobilization, allele-specific splicing, genetic variation in a population, and many other regulated events can be unified–in theory and in practice.
-This is achieved with a simple model, NOMAD, that analyzes k-mer composition of raw sequencing reads (Chaung et al. 2022). 
-NOMAD finds constant sequences (anchors) that are followed by a set of sequences (targets) with sample-specific target variation and provides valid p-values. 
-NOMAD is reference-free, sidestepping the computational challenges associated with alignment and making it significantly faster and more efficient than alignment, and enabling discovery and statistical precision not currently available, even from pseudo-alignment.
+SPLASH is an unsupervised and reference-free unifying framework to discover regulated sequence variation through statistical analysis of k-mer composition in both DNA and RNA sequence. 
+SPLASH leverages our observation that detecting sample-regulated sequence variation, such as alternative splicing, RNA editing, gene fusions, V(D)J, transposable element mobilization, allele-specific splicing, genetic variation in a population, and many other regulated events can be unified–in theory and in practice.
+This is achieved with a simple model, SPLASH, that analyzes k-mer composition of raw sequencing reads (Chaung et al. 2022). 
+SPLASH finds constant sequences (anchors) that are followed by a set of sequences (targets) with sample-specific target variation and provides valid p-values. 
+SPLASH is reference-free, sidestepping the computational challenges associated with alignment and making it significantly faster and more efficient than alignment, and enabling discovery and statistical precision not currently available, even from pseudo-alignment.
 
-The first version of [NOMAD](https://github.com/salzman-lab/nomad/) pipeline proved its usefullness.
+The first version of [SPLASH](https://github.com/salzman-lab/splash/) pipeline proved its usefullness.
 It was implemented mainly in Python with the use of NextFlow.
 Here we provide a new and improved implementation based in C++ and Python (Kokot et al. 2023).
 This new version is much more efficient and allows for the analysis of datasets >1TB size in hours on a workstation or even a laptop.
 
 ## How does it work
 
-A key concept of NOMAD is the analysis of composition of pairs of substrings *anchor*&ndash;*target* across many samples.
+A key concept of SPLASH is the analysis of composition of pairs of substrings *anchor*&ndash;*target* across many samples.
 The substrings can be adjacent in reads or can be separated by a *gap*.
 
-The image below presents the NOMAD pipeline on a high-level.
+The image below presents the SPLASH pipeline on a high-level.
+![image](https://github.com/refresh-bio/SPLASH/assets/9378882/8210fee0-c877-4374-9938-e3c01ea69e76)
 
-![image](https://user-images.githubusercontent.com/9378882/225988504-70266e4d-37e0-4c85-8c95-e47ad208cda9.png)
+<!-- ![image](https://user-images.githubusercontent.com/9378882/225988504-70266e4d-37e0-4c85-8c95-e47ad208cda9.png) -->
 
 <!-- ![image](https://user-images.githubusercontent.com/9378882/224449978-309a4708-0fa1-4cb8-8483-a32e36ec2d58.png) -->
 
 ## Installation
 ### Precompiled binaries
-The easiest way to get nomad is to use [precompiled release](https://github.com/refresh-bio/NOMAD/releases).
-To get the version 2.0.0 and run the example is is sufficient to do:
+The easiest way to get SPLASH is to use [precompiled release](https://github.com/refresh-bio/SPLASH/releases).
+To get the version 2.1.4 and run the example is is sufficient to do:
 ```
-curl -L https://github.com/refresh-bio/NOMAD/releases/download/v2.0.0/nomad-2.0.0.linux.x64.tar.gz | tar xz
+curl -L https://github.com/refresh-bio/SPLASH/releases/download/v2.1.4/splash-2.1.4.linux.x64.tar.gz | tar xz
 cd example
 ./run-example.sh
 ```
 ### Compile from sources
-NOMAD is implemented as a number of applications written in the C++ programming language and a Python wrapper to run the whole pipeline.
+SPLASH is implemented as a number of applications written in the C++ programming language and a Python wrapper to run the whole pipeline.
 Currently the software may be used only under Linux. 
 A compiler supporting C++17 is needed to compile the code.
-Use following snippet to install NOMAD.
+Use following snippet to install SPLASH.
 ```
-git clone https://github.com/refresh-bio/nomad
-cd nomad
+git clone https://github.com/refresh-bio/splash
+cd splash
 make -j
 sudo make install
 ```
@@ -47,7 +48,7 @@ To verify the installation on small example one may perform:
 ```
 cd example
 ./download.py #download examplary data
-nomad input.txt #run the pipeline with default parameters
+splash input.txt #run the pipeline with default parameters
 ```
 The result consists of two TSV files, namely,
  1. `result.after_correction.all_anchors.tsv` 
@@ -57,7 +58,7 @@ The second file contains only anchors whose corrected p-value is below 0.05.
 
 ## Inputs
 There is a lot of parameters allowing to customize the pipeline. They can be grouped into several categories. 
-The parameters will be displayed when running nomad without parameters (or with `--help`).
+The parameters will be displayed when running splash without parameters (or with `--help`).
  
 ### Input reads parameters:
 * `--input_file` &mdash;
@@ -68,10 +69,14 @@ The parameters will be displayed when running nomad without parameters (or with
  
 ### Anchor filtering parameters:
 * `--poly_ACGT_len` &mdash; filter out all anchors containing poly(ACGT) of length at least <poly_ACGT_len> (0 means no filtering) (default: 8)
+* `--artifacts` &mdash; path to artifacts, each anchor containing artifact will be filtered out (default: )
+* `--dont_filter_illumina_adapters` &mdash; if used anchors containing Illumina adapters will not be filtered out (default: False)
 * `--anchor_unique_targets_threshold` &mdash; filter out all anchors for which the number of unique targets is <= anchor_unique_targets_threshold (default: 1)
 * `--anchor_count_threshold` &mdash; filter out all anchors for which the total count <= anchor_count_threshold (default: 50)
 * `--anchor_samples_threshold` &mdash; filter out all anchors for which the number of unique samples is <= anchor_samples_threshold (default: 1)
 * `--anchor_sample_counts_threshold` &mdash; filter out anchor from sample if its count in this sample is <= anchor_sample_counts_threshold (default: 5)
+* `--n_most_freq_targets_for_stats` &mdash; use at most n_most_freq_targets_for_stats for each contignency table, 0 means use all (default: 0)
+* `--min_hamming_threshold` &mdash; keep only anchors with a pair of targets that differ by >= min_hamming_threshold (default: 0)
  
 ### Statistical significance parameters:
 * `--pvals_correction_col_name` &mdash; for which column correction should be applied (default: pval_opt)
@@ -79,18 +84,21 @@ The parameters will be displayed when running nomad without parameters (or with
  
 ### Reporting parameters:
 * `--outname_prefix` &mdash; prefix of output file names (default: result)
-* `--n_most_freq_targets_for_stats` &mdash; use at most n_most_freq_targets_for_stats for each contignency table, 0 means use all (default: 0)
 * `--dump_Cjs` &mdash; output Cjs (default: False)
 * `--max_pval_opt_for_Cjs` &mdash; dump only Cjs for anchors that have pval_opt <= max_pval_opt_for_Cjs (default: 0.1)
 * `--n_most_freq_targets` &mdash; number of most frequent tragets printed per each anchor in stats mode (default: 2)
 * `--with_effect_size_cts` &mdash; if set effect_size_cts will be computed (default: False)
+* `--with_pval_asymp_opt` &mdash; if set pval_asymp_opt will be computed (default: False)
 * `--dump_sample_anchor_target_count_txt` &mdash; if set contignency tables will be generated in text format (default: False)
 * `--dump_sample_anchor_target_count_binary` &mdash; if set contignency tables will be generated in binary (SATC) format, to convert to text format later `satc_dump` program may be used, it may take optionally mapping from id to sample_name (--sample_names param) (default: False)
-* `--dont_clean_up` &mdash; if set then intermediate files will not be removed (default: False)                                                                                                     
+* `--dump_sample_anchor_target_count_txt` &mdash; if set contignency tables will be generated in text format (default: False)
+* `dump_sample_anchor_target_count_binary` &mdash; if set contignency tables will be generated in binary (satc) format, to convert to text format later satc_dump program may be used, it may take optionally mapping from id to sample_name (--sample_names param) (default: False)
+* `--dont_clean_up` &mdash; if set then intermediate files will not be removed (default: False)
  
 ### Directory parameters:
-* `--sample_name_to_id` &mdash; file name with mapping sample name <-> sammpe id (default: sample_name_to_id.mapping.txt)                                                                                  * `--bin_path` &mdash; path to a directory where satc, satc_dump, satc_merge, sig_anch, kmc, kmc_tools binaries are (if any not found there nomad will check if installed and use installed) (default: ./)
-* `--tmp_dir` &mdash; path to a directory where temporary files will be stored (default: let nomad decide)
+* `--sample_name_to_id` &mdash; file name with mapping sample name <-> sammpe id (default: sample_name_to_id.mapping.txt)                                                                                 
+* `--bin_path` &mdash; path to a directory where satc, satc_dump, satc_merge, sig_anch, kmc, kmc_tools binaries are (if any not found there splash will check if installed and use installed) (default: ./)
+* `--tmp_dir` &mdash; path to a directory where temporary files will be stored (default: let splash decide)
 * `--logs_dir` &mdash; director where run logs of each thread will be stored (default: logs)          
  
 ### High Performance Computing parameters:
@@ -105,7 +113,9 @@ The parameters will be displayed when running nomad without parameters (or with
 * `--opt_num_inits` &mdash; the number of altMaximize random initializations (default: 10)
 * `--opt_num_iters` &mdash; the maximum number of iterations in altMaximize (default: 50)
 * `--num_rand_cf` &mdash; the number of random c and f used for pval_base (default: 50)
+* `--num_splits` &mdash; the number of contingency table splits (default: 1)
 * `--opt_train_fraction` &mdash; in calc_stats mode use this fraction to create training data from contingency table (default: 0.25)
+* `--without_alt_max` &mdash; if set int alt max and related stats will not be computed (default: False)
 
 ### 10X SC-RNAseq parameters:
 - `run_10X`
@@ -127,7 +137,7 @@ There are following columns in the resulting tsv files
 | anchor                            | anchor                                 |                                                                                        |
 | pval_opt                          | p-value from alternating maximization  | number of iterations and other parameters can be configured with switches. Optimization formulation and statistical exposition in (Baharav et al. 2023).             |
 | effect_size_bin                   | measure of anchor's effect size        | bounded between [0,1], indicates how well the data is divided between 2 groups of columns. 0 indicates no different between groups, 1 indicates two groups of columns with disjoint row supports (Baharav et al. 2023).                    |
-| pval_asymp_base                   | asymptotically valid p-value based on alternating maximization                  | compute optimizing c and f used for pval_opt, and evaluate approximate p-value given by asymptotic normality (Baharav et al. 2023). |
+| pval_asymp_opt                    | asymptotically valid p-value based on alternating maximization                  | compute optimizing c and f used for pval_opt, and evaluate approximate p-value given by asymptotic normality (Baharav et al. 2023). |
 | pval_base                         | p-value based on random partitionings      | Compute base p-value (using num_rand_cf random c,f), Bonferroni correction to yield valid p-value. |
 | M                                 | anchor count                           | number of anchor occurences in the input (= the sum of elements in contingency table)  |
 | anch_uniqTargs                    | number of unique targets for anchor    | (=number of rows in contingency table)                                                  |
@@ -156,16 +166,16 @@ Its format is one sample per line. Each line should contain the name of a sample
 
 ## Additional output
 ### Most frequent targets per each anchor
-By default NOMAD will store 2 most frequent targets per each anchor in the resulting TSV files. This should be sufficient for splicing, but for RNA editing/missmatches 4 may be a better choice. It may be set with `--n_most_freq_targets` switch. If the number of targets for a given anchor is lower than specified value there will be a single `-` for each missing target.
+By default SPLASH will store 2 most frequent targets per each anchor in the resulting TSV files. This should be sufficient for splicing, but for RNA editing/missmatches 4 may be a better choice. It may be set with `--n_most_freq_targets` switch. If the number of targets for a given anchor is lower than specified value there will be a single `-` for each missing target.
 ### SATC format
-NOMAD stores intermediate and optional output files in SATC format (**S**ample **A**nchor **T**arget **C**ount).
+SPLASH stores intermediate and optional output files in SATC format (**S**ample **A**nchor **T**arget **C**ount).
 ### Sample representation
-The unique id is assigned to each sample. The ids are consecutive numbers starting with 0. The first sample from the input file gets id 0, the second one gets 1, and so on. By default NOMAD will store the mapping in `sample_name_to_id.mapping.txt` file, but this may be redefined with `--sample_name_to_id` parameter. This mapping file may be useful to access the data stored in SATC format.
+The unique id is assigned to each sample. The ids are consecutive numbers starting with 0. The first sample from the input file gets id 0, the second one gets 1, and so on. By default SPLASH will store the mapping in `sample_name_to_id.mapping.txt` file, but this may be redefined with `--sample_name_to_id` parameter. This mapping file may be useful to access the data stored in SATC format.
 ### Output sample, anchor, target, count
 #### Textual
 When `--dump_sample_anchor_target_count_txt` switch is used there will be an additional output directory (named `result_dump` by default, but the `results` part may be redefined with `--outname_prefix` switch). This directory will contain a number of files (equal to the number of bins, default 128, may be redefined with `--n_bins` switch). The extension of these files is `.satc.dump`. Each line of these files is a tab-separated list of <sample_name> <anchor> <target> <count>. This is the easiest way to be able to reproduce contingency tables used during computation. Each file contains some number of anchors, but it is assured that a specific anchor is present in a single file. Since these text files could be large, it may be proficient to use binary (SATC) files instead.
 #### Binary
-When `--dump_sample_anchor_target_count_binary` switch is used there will be an additional output directory (named `result_satc` by default, but the `results` part may be redefined with `--outname_prefix` switch). This directory will contain a number of files (equal to the number of bins, default 128, may be redefined with `--n_bins` switch). The extension of these files is `.satc`. These are binary files in SATC format. Their content may be converted to textual representation with `satc_dump` program (part of the NOMAD package). Each file contains some number of anchors, but it is assured that a specific anchor is present in a single file. Since these text files could be large, it may be proficient to use binary (SATC) files instead, especially if one wants to investigate only some of all anchors.
+When `--dump_sample_anchor_target_count_binary` switch is used there will be an additional output directory (named `result_satc` by default, but the `results` part may be redefined with `--outname_prefix` switch). This directory will contain a number of files (equal to the number of bins, default 128, may be redefined with `--n_bins` switch). The extension of these files is `.satc`. These are binary files in SATC format. Their content may be converted to textual representation with `satc_dump` program (part of the SPLASH package). Each file contains some number of anchors, but it is assured that a specific anchor is present in a single file. Since these text files could be large, it may be proficient to use binary (SATC) files instead, especially if one wants to investigate only some of all anchors.
 ### satc_dump
 To convert SATC files into textual representation one may use `satc_dump` program. The simples usage is
 ```
@@ -177,10 +187,10 @@ There are also additional parameters that may be useful, namely:
  * `--n_bins <int>` &mdash; if set to value different than 0 the input is interpreted as a list of bins (each bin in separate line, first list is bin_0, second line is bin_1, etc. (in case of ill-formed input results will be incorrect)
  * `--separately` &mdash; if set with n_bins != 0 output param will be treated as suffix name and there will be output for each bin
  
- If `--sample_names` is not used in the output there will be sample ids instead of its names. NOMAD by default write mapping to `sample_name_to_id.mapping.txt` file (may be redefined with `--sample_name_to_id` switch of NOMAD.
+ If `--sample_names` is not used in the output there will be sample ids instead of its names. SPLASH by default write mapping to `sample_name_to_id.mapping.txt` file (may be redefined with `--sample_name_to_id` switch of SPLASH.
  
  ### Interpreting results
- `plotGeneration.py` provides basic functionality to visualize contingency tables. To use, run NOMAD with the --dump_sample_anchor_target_count_binary flag. Then, `plotGeneration.py` can be run with inputs:
+ `plotGeneration.py` provides basic functionality to visualize contingency tables. To use, run SPLASH with the --dump_sample_anchor_target_count_binary flag. Then, `plotGeneration.py` can be run with inputs:
 - `--dsName` &mdash; dataset name to be used in title of plots
 - `outFolder` &mdash; path to save files to
 - `metadataPath` &mdash; path to .tsv file containing two columns titled `sampleName` and `metadata`
@@ -188,7 +198,7 @@ There are also additional parameters that may be useful, namely:
 - `pvDfPath` &mdash; path to p-value .tsv file (result.after_correction.scores.tsv)
 - `sampleMappingTxt` &mdash; path to sample_name_to_id.mapping.txt file
 - `anchorFile` &mdash; file of anchors to be plotted, one anchor per line
-- `satc_dump_file` &mdash; path to satc_dump utility file, within NOMAD /bin folder
+- `satc_dump_file` &mdash; path to satc_dump utility file, within SPLASH /bin folder
 - `skipSATC` &mdash; optional flag, if .satc files have already been dumped for this set of anchors into the output folder, and the desire is to regenerate plots.
 
 The generated output is 1 file per anchor, each file containing 4 subplots. At bottom left is the raw I x J contingency table, where each column is a sample, and each row represents a target (low abundance targets not shown). Each sample’s target counts are normalized by $n_j$ and plotted. At bottom right, the targets for the contingency table are displayed in a I x k table (corresponding to the rows of the contingency table, visually aligned). Each target of length k is shown, with basepairs colored as in the below colorbar. The target sequence for each row is displayed on the right. At top left,  the sample metadata is shown in a 1 x J table. Each entry corresponds to the column in the contingency table directly below it. The sample metadata is shown in a colorbar below. At top right, the column counts ($n_j$) are shown in a 1 x J table, where the colorbar below provides the scale. Again, columns are sorted as the contingency table.
@@ -197,9 +207,9 @@ The generated output is 1 file per anchor, each file containing 4 subplots. At b
 The script `c_analysis.ipynb` shows how the saved c vectors can be loaded in for further analysis. `--dump_Cjs` must be enabled for this.
  
 ## Biological interpretation and classification of anchors
-To facilitate downstream analysis of anchors, we provide a postprocessing script `NOMAD_extendor_classification.R`, that can be run on the anchors file generataed from the NOMAD run to classify anchors to biologically meaningful events such as alternative splicing, and base pair changes. `NOMAD_extendor_classification.R` needs the following inputs:
+To facilitate downstream analysis of anchors, we provide a postprocessing script `SPLASH_extendor_classification.R`, that can be run on the anchors file generataed from the SPLASH run to classify anchors to biologically meaningful events such as alternative splicing, and base pair changes. `SPLASH_extendor_classification.R` needs the following inputs:
 
-- `directory` &mdash; the output directory used for the NOMAD run
+- `directory` &mdash; the output directory used for the SPLASH run
 - `which_anchors_file` &mdash; flag to decide which anchor file (after correction or all anchors) to use, could be "after_correction" or "all" 
 - `effect_size_cutoff` &mdash; the effect size cutoff for significant anchors (default 0.2) 
 - `num_samples_cutoff` &mdash; the minimum number of sampels for an anchor to be called (default 20)
@@ -214,7 +224,7 @@ To facilitate downstream analysis of anchors, we provide a postprocessing script
 - `bowtie2_reference` &mdash; path to bowtie2 index for the reference genome
 - `paralogs_file` &mdash; path to file containing list of paralogous genes from reference genome
  
-The script will generate a file `classified_anchors.tsv` in the same directory used for NOMAD run, containing significant anchors along with their biological classification and alignment information.
+The script will generate a file `classified_anchors.tsv` in the same directory used for SPLASH run, containing significant anchors along with their biological classification and alignment information.
 
 ## Annotation files for human genome (based on T2T):
 The following three files are needed by the classification script for annotation anchors from human datasets and can be downloaded from the following links:
@@ -224,7 +234,7 @@ The following three files are needed by the classification script for annotation
 
 ## References
 Marek Kokot, Roozbeh Dehghannasiri, Tavor Baharav, Julia Salzman, and Sebastian Deorowicz.
-[NOMAD2 provides ultra-efficient, scalable, and unsupervised discovery on raw sequencing reads] (https://..) 
+[SPLASH2 provides ultra-efficient, scalable, and unsupervised discovery on raw sequencing reads] (https://..) 
 bioRxiv (2023)
  
 Kaitlin Chaung, Tavor Baharav,  Ivan Zheludev, Julia Salzman. [A statistical, reference-free algorithm subsumes myriad problems in genome science and enables novel discovery](https://doi.org/10.1101/2022.06.24.497555), bioRxiv (2022)
diff --git a/build_release.py b/build_release.py
index f8308fc..3978136 100755
--- a/build_release.py
+++ b/build_release.py
@@ -11,19 +11,21 @@ def replace_in_file(file_path, search_text, new_text):
             new_line = line.replace(search_text, new_text)
             print(new_line, end='')
 
-def get_ver(nomad_path):
-    with open(nomad_path) as f:
+def get_ver(splash_path):
+    with open(splash_path) as f:
         for line in f.readlines():
             line = line.strip()
-            if "NOMAD_VERSION" in line:
+            if "SPLASH_VERSION" in line:
                 return line.split("=")[-1].strip().split("\"")[1]
-    print("Error: cannot read NOMAD_VERSION")
+    print("Error: cannot read SPLASH_VERSION")
     sys.exit(1)
 
 def run_cmd(cmd):    
     p = subprocess.Popen(cmd, shell=True)
     p.communicate()
 
+run_cmd("git submodule init")
+run_cmd("git submodule update")
 run_cmd("make clean")
 run_cmd("make -j32 release")
 
@@ -37,12 +39,12 @@ def run_cmd(cmd):
 with open("bin/example/run-example.sh", "w") as f:
     f.write("#!/bin/bash\n")
     f.write("./download.py\n")
-    f.write("../nomad --bin_path .. input.txt\n")
+    f.write("../splash --bin_path .. input.txt\n")
 
 os.chmod("bin/example/run-example.sh", os.stat("bin/example/run-example.sh").st_mode | stat.S_IEXEC)
 
 
-ver = get_ver("bin/nomad")
+ver = get_ver("bin/splash")
 
-run_cmd(f"cd bin; tar -c * | pigz > ../nomad-{ver}.linux.x64.tar.gz; cd ..;")
+run_cmd(f"cd bin; tar -c * | pigz > ../splash-{ver}.linux.x64.tar.gz; cd ..;")
 run_cmd("rm -rf bin")
diff --git a/example/analysis_scripts/c_analysis.ipynb b/example/analysis_scripts/c_analysis.ipynb
index 005d70f..61fddab 100644
--- a/example/analysis_scripts/c_analysis.ipynb
+++ b/example/analysis_scripts/c_analysis.ipynb
@@ -30,7 +30,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "NOMAD2_result_folder=''"
+    "SPLASH2_result_folder=''"
    ]
   },
   {
@@ -41,7 +41,7 @@
    "outputs": [],
    "source": [
     "### read in p-values, afetr_correction.scores.csv file\n",
-    "fname=NOMAD2_result_folder+'/result.after_correction.scores.tsv'\n",
+    "fname=SPLASH2_result_folder+'/result.after_correction.scores.tsv'\n",
     "df = pd.read_csv(fname,sep='\\t')"
    ]
   },
@@ -95,7 +95,7 @@
     "dfArr = []\n",
     "\n",
     "##### path to intermediary_files output folder\n",
-    "fldrName= NOMAD2_result_folder+'/result_Cjs/'\n",
+    "fldrName= SPLASH2_result_folder+'/result_Cjs/'\n",
     "\n",
     "for fname in tqdm(glob.glob(fldrName+'bin*.cjs')):\n",
     "    dfArr.append(\n",
diff --git a/example/analysis_scripts/plotGeneration.py b/example/analysis_scripts/plotGeneration.py
index 60fb190..40d6fe3 100644
--- a/example/analysis_scripts/plotGeneration.py
+++ b/example/analysis_scripts/plotGeneration.py
@@ -66,7 +66,7 @@ def get_args():
     parser.add_argument(
         "--satc_dump_file",
         type=str,
-        help='Path to satc_dump utility file (within NOMAD2, /bin/satc_dump)'
+        help='Path to satc_dump utility file (within SPLASH2, /bin/satc_dump)'
     )
     
     args = parser.parse_args()
diff --git a/libs/refresh/deterministic_random.h b/libs/refresh/deterministic_random.h
new file mode 100644
index 0000000..6fa55f0
--- /dev/null
+++ b/libs/refresh/deterministic_random.h
@@ -0,0 +1,129 @@
+#pragma once
+#include <random>
+#include <tuple>
+#include <iostream>
+
+#undef min
+#undef max
+
+template<class IntType = int>
+class det_uniform_int_distribution {
+public:
+	// types
+	typedef IntType result_type;
+	typedef std::pair<int, int> param_type;
+
+	// constructors and reset functions
+	explicit det_uniform_int_distribution(IntType a = 0, IntType b = std::numeric_limits<IntType>::max());
+	explicit det_uniform_int_distribution(const param_type& parm);
+	void reset();
+
+	// generating functions
+	template<class URNG>
+	result_type operator()(URNG& g);
+	template<class URNG>
+	result_type operator()(URNG& g, const param_type& parm);
+
+	// property functions
+	result_type a() const;
+	result_type b() const;
+	param_type param() const;
+	void param(const param_type& parm);
+	result_type min() const;
+	result_type max() const;
+
+private:
+	typedef typename std::make_unsigned<IntType>::type diff_type;
+
+	IntType lower;
+	IntType upper;
+};
+
+template<class IntType>
+det_uniform_int_distribution<IntType>::det_uniform_int_distribution(IntType a, IntType b) {
+	lower = a;
+	upper = b;
+}
+
+template<class IntType>
+det_uniform_int_distribution<IntType>::det_uniform_int_distribution(const param_type& parm) {
+	param(parm);
+}
+
+template<class IntType>
+void det_uniform_int_distribution<IntType>::reset() {}
+
+template<class IntType>
+template<class URNG>
+auto det_uniform_int_distribution<IntType>::operator()(URNG& g) -> result_type {
+	return operator()(g, param());
+}
+
+template<class IntType>
+template<class URNG>
+auto det_uniform_int_distribution<IntType>::operator()(URNG& g, const param_type& parm) -> result_type {
+	diff_type diff = (diff_type)parm.second - (diff_type)parm.first + 1;
+	if (diff == 0) // If the +1 overflows we are using the full range, just return g()
+		return g();
+
+	diff_type badDistLimit = std::numeric_limits<diff_type>::max() / diff;
+	do {
+		diff_type generatedRand = g();
+
+		if (generatedRand / diff < badDistLimit)
+			return (IntType)((generatedRand % diff) + (diff_type)parm.first);
+	} while (true);
+}
+
+template<class IntType>
+auto det_uniform_int_distribution<IntType>::a() const -> result_type {
+	return lower;
+}
+
+template<class IntType>
+auto det_uniform_int_distribution<IntType>::b() const -> result_type {
+	return upper;
+}
+
+template<class IntType>
+auto det_uniform_int_distribution<IntType>::param() const -> param_type {
+	return param_type(lower, upper);
+}
+
+template<class IntType>
+void det_uniform_int_distribution<IntType>::param(const param_type& parm) {
+	std::tie(lower, upper) = parm;
+	if (upper < lower)
+		throw std::exception();
+}
+
+template<class IntType>
+auto det_uniform_int_distribution<IntType>::min() const -> result_type {
+	return lower;
+}
+
+template<class IntType>
+auto det_uniform_int_distribution<IntType>::max() const -> result_type {
+	return upper;
+};
+
+
+
+
+template<class RandomIt, class URBG>
+void partial_shuffle(RandomIt first, RandomIt middle, RandomIt last, URBG&& g)
+{
+	typedef typename std::iterator_traits<RandomIt>::difference_type diff_t;
+	typedef det_uniform_int_distribution<diff_t> distr_t;
+	typedef typename distr_t::param_type param_t;
+
+	distr_t D;
+	diff_t n = middle - first;
+	diff_t N = last - first - 1;
+	for (diff_t i =0; i < n; ++i) {
+		using std::swap;
+		swap(first[i], first[D(g, param_t(i, N))]);
+	}
+}
+
+
diff --git a/libs/refresh/parallel-queues.h b/libs/refresh/parallel-queues.h
index 9e250da..19158b0 100644
--- a/libs/refresh/parallel-queues.h
+++ b/libs/refresh/parallel-queues.h
@@ -187,6 +187,11 @@ namespace refresh {
 			}
 		}
 
+		bool check_completed() {
+			std::lock_guard<std::mutex> lck(mtx);
+			return is_completed;
+		}
+		
 		void cancel()
 		{
 			std::lock_guard<std::mutex> lck(mtx);
diff --git a/src/common/artifacts_filter.h b/src/common/artifacts_filter.h
new file mode 100644
index 0000000..4f894da
--- /dev/null
+++ b/src/common/artifacts_filter.h
@@ -0,0 +1,75 @@
+#ifndef _ARTIFACTS_FILTER_H
+#define _ARTIFACTS_FILTER_H
+#include "../common/satc_data.h"
+#include <unordered_set>
+#include <map>
+
+class ArtifactsFilter
+{
+	std::map<uint32_t, std::unordered_set<uint64_t>> artifacts; //index is artifact len
+public:
+	ArtifactsFilter(const std::string& path) //empty path means no filtering
+	{
+		if (path == "")
+			return;
+
+		std::ifstream in(path);
+		
+		if (!in)
+		{
+			std::cerr << "Error: cannot open file " << path << "\n";
+			exit(1);
+		}
+		std::string artifact;
+
+		while (in >> artifact)
+		{
+			auto len = artifact.length();
+			assert(len <= 32);
+			artifacts[len].insert(str_kmer_to_uint64_t(artifact));
+		}
+	}
+	ArtifactsFilter() :
+		ArtifactsFilter("")
+	{
+
+	}
+
+	void Add(uint32_t len, const std::vector<uint64_t>& new_artifacts) {
+		auto& art = artifacts[len];
+		for (auto x : new_artifacts)
+			art.insert(x);
+	}
+
+	bool ContainsArtifact(uint64_t anchor, uint32_t len) const
+	{
+		for (const auto& x : artifacts)
+		{
+			uint32_t art_len = x.first;
+
+			//skip too long artifacts
+			if (art_len > len)
+				continue;
+
+			const auto& art = x.second;
+			
+			uint64_t mask = ((1ull) << (2 * art_len)) - 1;
+
+			auto no_parts = len - art_len + 1;
+			uint64_t check = anchor;
+			for (uint32_t i = 0; i < no_parts - 1; ++i)
+			{
+				if (art.count(check & mask))
+					return true;
+				check >>= 2;
+			}
+			//last one
+			if (art.count(check & mask))
+				return true;
+		}
+		return false;
+	}
+};
+
+
+#endif 
diff --git a/src/common/hamming_filter.h b/src/common/hamming_filter.h
new file mode 100644
index 0000000..6a2635e
--- /dev/null
+++ b/src/common/hamming_filter.h
@@ -0,0 +1,73 @@
+#ifndef _HAMMING_FILTER_H
+#define _HAMMING_FILTER_H
+#include <cinttypes>
+#include <vector>
+#include "target_count.h"
+
+
+class HammingFilter
+{
+    uint32_t min_hamming_threshold;
+
+	int calculateHamming(uint64_t a, uint64_t b) const {
+		
+		// generate mask with even bits
+		static uint64_t MASK_EVEN_BITS = []() {
+			uint64_t v = 0;
+			for (int i = 0; i < 32; ++i) {
+				v <<= 2;
+				v |= 1ULL;
+			}
+			return v;
+		}();
+
+		//a &= ~HOMOPOLYMER_MASK;
+		//b &= ~HOMOPOLYMER_MASK;
+
+		uint64_t diffs_even = (a ^ b) & MASK_EVEN_BITS;
+		uint64_t diffs_odd = ((a >> 1) ^ (b >> 1)) & MASK_EVEN_BITS;
+
+		uint64_t diffs = diffs_even | diffs_odd;
+		#ifdef _WIN32
+		int cnt = __popcnt64(diffs);
+		#else
+		int cnt = __builtin_popcountll(diffs);
+		#endif
+		return cnt;
+	}
+
+public:
+    HammingFilter(uint32_t min_hamming_threshold)
+        : min_hamming_threshold(min_hamming_threshold)
+    {
+
+    }
+    HammingFilter() :
+        HammingFilter(0)
+    {
+
+    }
+
+    bool ContainsDistantPair(const std::vector<TargetCount>& targets) const
+    {
+        if (min_hamming_threshold == 0)
+            return true;
+
+        if (targets.size() < 2)
+            return false;
+
+        for (size_t i = 0; i < targets.size(); ++i)
+        {
+            for (size_t j = i+1; j < targets.size(); ++j)
+            {
+                uint64_t target_i = targets[i].target;
+                uint64_t target_j = targets[j].target;
+                if (static_cast<uint32_t>(calculateHamming(target_i, target_j)) >= min_hamming_threshold)
+                    return true;
+            }
+        }
+
+        return false;
+    }
+};
+#endif
diff --git a/src/common/illumina_adapters_static.cpp b/src/common/illumina_adapters_static.cpp
new file mode 100644
index 0000000..cf5dfd5
--- /dev/null
+++ b/src/common/illumina_adapters_static.cpp
@@ -0,0 +1,778 @@
+#include "illumina_adapters_static.h"
+
+std::vector<uint64_t> IlluminaAdaptersStatic::Get12Mers() {
+	return {
+        143048, // AAAGAGTGTAGA
+        368117, // AACCGCTCTTCC
+        411825, // AACGCAGAGTAC
+        479953, // AACTCCAGTCAC
+        551217, // AAGACGGCATAC
+        561435, // AAGAGCACACGT
+        563956, // AAGAGCGGTTCA
+        564078, // AAGAGCGTCGTG
+        564659, // AAGAGCTCGTAT
+        572195, // AAGAGTGTAGAT
+        598150, // AAGCAGAAGACG
+        601779, // AAGCAGTGGTAT
+        930214, // AATGATACGGCG
+        940165, // AATGCCGAGACC
+       1013422, // AATTCTCGGGTG
+       1128280, // ACACATCTCCGA
+       1128326, // ACACATCTGACG
+       1148535, // ACACGACGCTCT
+       1161089, // ACACGTCTGAAC
+       1171413, // ACACTCTTTCCC
+       1275273, // ACATCTCCGAGC
+       1276007, // ACATCTGACGCT
+       1333389, // ACCACCGAGATC
+       1450865, // ACCGAGATCTAC
+       1455980, // ACCGATCTCGTA
+       1472470, // ACCGCTCTTCCG
+       1599357, // ACGACGCTCTTC
+       1609133, // ACGAGATCGGTC
+       1647300, // ACGCAGAGTACA
+       1695576, // ACGCTCTTCCGA
+       1697158, // ACGCTGCCGACG
+       1723490, // ACGGCATACGAG
+       1728837, // ACGGCGACCACC
+       1800221, // ACGTCTGAACTC
+       1817055, // ACGTGTGCTCTT
+       1959672, // ACTCTGCGTTGA
+       1965404, // ACTCTTTCCCTA
+       2001874, // ACTGGAGTTCAG
+       2131603, // AGAAGACGGCAT
+       2188150, // AGACCGATCTCG
+       2204870, // AGACGGCATACG
+       2210717, // AGACGTGTGCTC
+       2245741, // AGAGCACACGTC
+       2255826, // AGAGCGGTTCAG
+       2256315, // AGAGCGTCGTGT
+       2258638, // AGAGCTCGTATG
+       2278634, // AGAGTACATGGG
+       2288781, // AGAGTGTAGATC
+       2320418, // AGATCGGAAGAG
+       2321270, // AGATCGGTCTCG
+       2322717, // AGATCTACACTC
+       2324154, // AGATCTCGGTGG
+       2341680, // AGATGTGTATAA
+       2377438, // AGCACACGTCTG
+       2392602, // AGCAGAAGACGG
+       2400485, // AGCAGGAATGCC
+       2407117, // AGCAGTGGTATC
+       2442785, // AGCCCACGAGAC
+       2538788, // AGCGGTTCAGCA
+       2544187, // AGCGTCAGATGT
+       2546610, // AGCGTCGTGTAG
+       2583781, // AGCTCGTATGCC
+       2636130, // AGGAATGCCGAG
+       2753070, // AGGGAAAGAGTG
+       3060545, // AGTGGTATCAAC
+       3066077, // AGTGTAGATCTC
+       3098734, // AGTTCAGACGTG
+       3216245, // ATACACATCTCC
+       3216248, // ATACACATCTGA
+       3229599, // ATACCACTGCTT
+       3246298, // ATACGAGATCGG
+       3253780, // ATACGGCGACCA
+       3414306, // ATCAACGCAGAG
+       3572260, // ATCGGAAGAGCA
+       3572262, // ATCGGAAGAGCG
+       3572263, // ATCGGAAGAGCT
+       3585897, // ATCGGTCTCGGC
+       3609055, // ATCTACACTCTT
+       3627157, // ATCTCCGAGCCC
+       3632045, // ATCTCGGTGGTC
+       3632357, // ATCTCGTATGCC
+       3638905, // ATCTGACGCTGC
+       3720856, // ATGATACGGCGA
+       3760662, // ATGCCGAGACCG
+       3763703, // ATGCCGTCTTCT
+       3874278, // ATGTACTCTGCG
+       3912456, // ATGTGTATAAGA
+       4028896, // ATTCCTGCTGAA
+       4053689, // ATTCTCGGGTGC
+       4297260, // CAACGCAGAGTA
+       4343841, // CAAGCAGAAGAC
+       4484576, // CACACGTCTGAA
+       4513122, // CACATCTCCGAG
+       4513305, // CACATCTGACGC
+       4557020, // CACCGAGATCTA
+       4594143, // CACGACGCTCTT
+       4644359, // CACGTCTGAACT
+       4685655, // CACTCTTTCCCT
+       4727204, // CAGAAGACGGCA
+       4746983, // CAGACGTGTGCT
+       4763962, // CAGAGTACATGG
+       4779724, // CAGATGTGTATA
+       4794425, // CAGCAGGAATGC
+       4830350, // CAGCGTCAGATG
+       4853336, // CAGGAATGCCGA
+       4959440, // CAGTGGTATCAA
+       5005878, // CATACGAGATCG
+       5101093, // CATCTCCGAGCC
+       5104030, // CATCTGACGCTG
+       5162873, // CATGTACTCTGC
+       5201528, // CATTCCTGCTGA
+       5333559, // CCACCGAGATCT
+       5451904, // CCATATAAGAAA
+       5485022, // CCATGTACTCTG
+       5557280, // CCCATATAAGAA
+       5565559, // CCCATGTACTCT
+       5706118, // CCCTACACGACG
+       5801357, // CCGAGACCGATC
+       5803460, // CCGAGATCTACA
+       5805336, // CCGAGCCCACGA
+       5823923, // CCGATCTCGTAT
+       5889880, // CCGCTCTTCCGA
+       5996007, // CCGTCTTCTGCT
+       6047257, // CCTACACGACGC
+       6191126, // CCTGCTGAACCG
+       6374792, // CGACCACCGAGA
+       6397429, // CGACGCTCTTCC
+       6428215, // CGAGACCGATCT
+       6436535, // CGAGATCGGTCT
+       6436625, // CGAGATCTACAC
+       6444130, // CGAGCCCACGAG
+       6518478, // CGATCTCGTATG
+       6589203, // CGCAGAGTACAT
+       6665039, // CGCCGTATCATT
+       6782307, // CGCTCTTCCGAT
+       6788632, // CGCTGCCGACGA
+       6824516, // CGGAAGAGCACA
+       6824555, // CGGAAGAGCGGT
+       6824557, // CGGAAGAGCGTC
+       6824566, // CGGAAGAGCTCG
+       6852332, // CGGAGATGTGTA
+       6891218, // CGGCAGCGTCAG
+       6893960, // CGGCATACGAGA
+       6897017, // CGGCATTCCTGC
+       6915350, // CGGCGACCACCG
+       7005450, // CGGGTGCCAAGG
+       7042707, // CGGTCTCGGCAT
+       7056790, // CGGTGGTCGCCG
+       7066186, // CGGTTCAGCAGG
+       7136695, // CGTATGCCGTCT
+       7152571, // CGTCAGATGTGT
+       7185563, // CGTCGGCAGCGT
+       7191338, // CGTCGTGTAGGG
+       7200885, // CGTCTGAACTCC
+       7206815, // CGTCTTCTGCTT
+       7251816, // CGTGGGCTCGGA
+       7260800, // CGTGTAGGGAAA
+       7268221, // CGTGTGCTCTTC
+       7310417, // CGTTGATACCAC
+       7411815, // CTACACGACGCT
+       7413245, // CTACACTCTTTC
+       7702865, // CTCCGAGCCCAC
+       7768302, // CTCGGAGATGTG
+       7771095, // CTCGGCATTCCT
+       7777872, // CTCGGGTGCCAA
+       7781081, // CTCGGTGGTCGC
+       7786075, // CTCGTATGCCGT
+       7793270, // CTCGTGGGCTCG
+       7838691, // CTCTGCGTTGAT
+       7851076, // CTCTTATACACA
+       7853623, // CTCTTCCGATCT
+       7861617, // CTCTTTCCCTAC
+       7870071, // CTGAACCGCTCT
+       7871819, // CTGAACTCCAGT
+       7890838, // CTGACGCTGCCG
+       7978545, // CTGCGTTGATAC
+       7987289, // CTGCTGAACCGC
+       8007496, // CTGGAGTTCAGA
+       8091596, // CTGTCTCTTATA
+       8176717, // CTTATACACATC
+       8345361, // CTTTCCCTACAC
+       8424370, // GAAAGAGTGTAG
+       8480637, // GAACCGCTCTTC
+       8508596, // GAACTCCAGTCA
+       8526412, // GAAGACGGCATA
+       8528966, // GAAGAGCACACG
+       8529597, // GAAGAGCGGTTC
+       8529627, // GAAGAGCGTCGT
+       8529772, // GAAGAGCTCGTA
+       8623649, // GAATGCCGAGAC
+       8641963, // GAATTCTCGGGT
+       8721955, // GACCACCGAGAT
+       8752603, // GACCGATCTCGT
+       8812502, // GACGCTCTTCCG
+       8812897, // GACGCTGCCGAC
+       8819480, // GACGGCATACGA
+       8842871, // GACGTGTGCTCT
+       8889076, // GACTGGAGTTCA
+       8935645, // GAGACCGATCTC
+       8968925, // GAGATCGGTCTC
+       8969287, // GAGATCTACACT
+       8974028, // GAGATGTGTATA
+       8982967, // GAGCACACGTCT
+       8999304, // GAGCCCACGAGA
+       9023305, // GAGCGGTTCAGC
+       9025260, // GAGCGTCGTGTA
+       9034553, // GAGCTCGTATGC
+       9155127, // GAGTGTAGATCT
+       9163291, // GAGTTCAGACGT
+       9196007, // GATACCACTGCT
+       9202053, // GATACGGCGACC
+       9281673, // GATCGGAAGAGC
+       9285082, // GATCGGTCTCGG
+       9290871, // GATCTACACTCT
+       9296619, // GATCTCGGTGGT
+       9296697, // GATCTCGTATGC
+       9366722, // GATGTGTATAAG
+       9509752, // GCACACGTCTGA
+       9570409, // GCAGAAGACGGC
+       9579598, // GCAGAGTACATG
+       9596195, // GCAGCGTCAGAT
+       9601942, // GCAGGAATGCCG
+       9628468, // GCAGTGGTATCA
+       9640077, // GCATACGAGATC
+       9688990, // GCATTCCTGCTG
+       9838947, // GCCGAGACCGAT
+       9887609, // GCCGTCTTCTGC
+       9982306, // GCGACCACCGAG
+      10155154, // GCGGTTCAGCAG
+      10176750, // GCGTCAGATGTG
+      10186442, // GCGTCGTGTAGG
+      10216212, // GCGTTGATACCA
+      10330683, // GCTCGGAGATGT
+      10335126, // GCTCGTATGCCG
+      10352013, // GCTCTTCCGATC
+      10356125, // GCTGAACCGCTC
+      10494700, // GGAAAGAGTGTA
+      10520849, // GGAAGAGCACAC
+      10521007, // GGAAGAGCGGTT
+      10521014, // GGAAGAGCGTCG
+      10521051, // GGAAGAGCTCGT
+      10544520, // GGAATGCCGAGA
+      10549098, // GGAATTCTCGGG
+      10632115, // GGAGATGTGTAT
+      10679430, // GGAGTTCAGACG
+      10787656, // GGCAGCGTCAGA
+      10798627, // GGCATACGAGAT
+      10810855, // GGCATTCCTGCT
+      10884184, // GGCGACCACCGA
+      10971278, // GGCTCGGAGATG
+      11012283, // GGGAAAGAGTGT
+      11131427, // GGGCTCGGAGAT
+      11325540, // GGTATCAACGCA
+      11376333, // GGTCGCCGTATC
+      11393615, // GGTCTCGGCATT
+      11449947, // GGTGGTCGCCGT
+      11487528, // GGTTCAGCAGGA
+      11656815, // GTACTCTGCGTT
+      11679595, // GTAGATCTCGGT
+      11706402, // GTAGGGAAAGAG
+      11733124, // GTATAAGAGACA
+      11747730, // GTATCAACGCAG
+      11769567, // GTATGCCGTCTT
+      11833068, // GTCAGATGTGTA
+      11950900, // GTCGCCGTATCA
+      11965037, // GTCGGCAGCGTC
+      11988136, // GTCGTGTAGGGA
+      12020029, // GTCTCGGCATTC
+      12021415, // GTCTCGTGGGCT
+      12025028, // GTCTCTTATACA
+      12026324, // GTCTGAACTCCA
+      12050046, // GTCTTCTGCTTG
+      12089903, // GTGACTGGAGTT
+      12181336, // GTGCTCTTCCGA
+      12230050, // GTGGGCTCGGAG
+      12242182, // GTGGTATCAACG
+      12245356, // GTGGTCGCCGTA
+      12264310, // GTGTAGATCTCG
+      12265986, // GTGTAGGGAAAG
+      12267656, // GTGTATAAGAGA
+      12295669, // GTGTGCTCTTCC
+      12394939, // GTTCAGACGTGT
+      12395680, // GTTCAGCAGGAA
+      12464455, // GTTGATACCACT
+      12815465, // TAATGATACGGC
+      12864982, // TACACATCTCCG
+      12864993, // TACACATCTGAC
+      12870045, // TACACGACGCTC
+      12875765, // TACACTCTTTCC
+      12985195, // TACGAGATCGGT
+      13015121, // TACGGCGACCAC
+      13072830, // TACTCTGCGTTG
+      13163950, // TAGATCTCGGTG
+      13271179, // TAGGGAAAGAGT
+      13378066, // TATAAGAGACAG
+      13386973, // TATACACATCTC
+      13386974, // TATACACATCTG
+      13436488, // TATCAACGCAGA
+      13523837, // TATGCCGTCTTC
+      13657227, // TCAACGCAGAGT
+      13668872, // TCAAGCAGAAGA
+      13769657, // TCAGACGTGTGC
+      13777843, // TCAGATGTGTAT
+      13781518, // TCAGCAGGAATG
+      14009441, // TCCCTACACGAC
+      14034246, // TCCGAGCCCACG
+      14130693, // TCCTGCTGAACC
+      14249171, // TCGCCGTATCAT
+      14289041, // TCGGAAGAGCAC
+      14289050, // TCGGAAGAGCGG
+      14289051, // TCGGAAGAGCGT
+      14289053, // TCGGAAGAGCTC
+      14295995, // TCGGAGATGTGT
+      14305716, // TCGGCAGCGTCA
+      14307166, // TCGGCATTCCTG
+      14334274, // TCGGGTGCCAAG
+      14343588, // TCGGTCTCGGCA
+      14347109, // TCGGTGGTCGCC
+      14367085, // TCGTATGCCGTC
+      14379302, // TCGTCGGCAGCG
+      14395866, // TCGTGGGCTCGG
+      14398112, // TCGTGTAGGGAA
+      14436223, // TCTACACTCTTT
+      14508628, // TCTCCGAGCCCA
+      14525685, // TCTCGGCATTCC
+      14527380, // TCTCGGGTGCCA
+      14528182, // TCTCGGTGGTCG
+      14529430, // TCTCGTATGCCG
+      14531229, // TCTCGTGGGCTC
+      14545681, // TCTCTTATACAC
+      14550866, // TCTGAACTCCAG
+      14555621, // TCTGACGCTGCC
+      14577548, // TCTGCGTTGATA
+      14627091, // TCTTATACACAT
+      14669252, // TCTTTCCCTACA
+      14703071, // TGAACCGCTCTT
+      14710061, // TGAACTCCAGTC
+      14786136, // TGACGCTGCCGA
+      14805181, // TGACTGGAGTTC
+      14881913, // TGATACCACTGC
+      14883425, // TGATACGGCGAC
+      15042648, // TGCCGAGACCGA
+      15054814, // TGCCGTCTTCTG
+      15136965, // TGCGTTGATACC
+      15170915, // TGCTCTTCCGAT
+      15171943, // TGCTGAACCGCT
+      15220186, // TGGAATTCTCGG
+      15252769, // TGGAGTTCAGAC
+      15365768, // TGGGCTCGGAGA
+      15414297, // TGGTATCAACGC
+      15426995, // TGGTCGCCGTAT
+      15497115, // TGTACTCTGCGT
+      15502810, // TGTAGATCTCGG
+      15509512, // TGTAGGGAAAGA
+      15516193, // TGTATAAGAGAC
+      15589169, // TGTCTCTTATAC
+      15628246, // TGTGCTCTTCCG
+      15649826, // TGTGTATAAGAG
+      15786778, // TTAATGATACGG
+      15929655, // TTATACACATCT
+      16000130, // TTCAAGCAGAAG
+      16025326, // TTCAGACGTGTG
+      16028291, // TTCAGCAGGAAT
+      16085272, // TTCCCTACACGA
+      16115585, // TTCCTGCTGAAC
+      16214757, // TTCTCGGGTGCC
+      16239850, // TTCTTATATGGG
+      16303390, // TTGATACCACTG
+      16529606, // TTTAATGATACG
+      16582944, // TTTCAAGCAGAA
+      16604230, // TTTCCCTACACG
+      16642874, // TTTCTTATATGG
+      16715313, // TTTTAATGATAC
+      16728648, // TTTTCAAGCAGA
+      16761740, // TTTTTAATGATA
+      16765074, // TTTTTCAAGCAG
+      16773347, // TTTTTTAATGAT
+      16774180, // TTTTTTCAAGCA
+      16776248, // TTTTTTTAATGA
+      16776457, // TTTTTTTCAAGC
+      16776974, // TTTTTTTTAATG
+      16777026, // TTTTTTTTCAAG
+      16777155, // TTTTTTTTTAAT
+      16777168, // TTTTTTTTTCAA
+      16777200, // TTTTTTTTTTAA
+      16777204, // TTTTTTTTTTCA
+      14436223, // TCTACACTCTTT
+      10521007, // GGAAGAGCGGTT
+      11656815, // GTACTCTGCGTT
+      12089903, // GTGACTGGAGTT
+      11769567, // GTATGCCGTCTT
+       1817055, // ACGTGTGCTCTT
+      14703071, // TGAACCGCTCTT
+       4594143, // CACGACGCTCTT
+       3246559, // ATACGAGCTCTT
+       3609055, // ATCTACACTCTT
+       7206815, // CGTCTTCTGCTT
+       3229599, // ATACCACTGCTT
+       6665039, // CGCCGTATCATT
+      11393615, // GGTCTCGGCATT
+       4547087, // CACCCGAGAATT
+      14295995, // TCGGAGATGTGT
+       7152571, // CGTCAGATGTGT
+       2256315, // AGAGCGTCGTGT
+      12394939, // GTTCAGACGTGT
+      11012283, // GGGAAAGAGTGT
+      10330683, // GCTCGGAGATGT
+       2544187, // AGCGTCAGATGT
+       9296619, // GATCTCGGTGGT
+      11679595, // GTAGATCTCGGT
+      12985195, // TACGAGATCGGT
+       6824555, // CGGAAGAGCGGT
+       8529627, // GAAGAGCGTCGT
+       8752603, // GACCGATCTCGT
+      15497115, // TGTACTCTGCGT
+      14289051, // TCGGAAGAGCGT
+       7185563, // CGTCGGCAGCGT
+       7786075, // CTCGTATGCCGT
+      11449947, // GGTGGTCGCCGT
+       9163291, // GAGTTCAGACGT
+        561435, // AAGAGCACACGT
+      13657227, // TCAACGCAGAGT
+      13271179, // TAGGGAAAGAGT
+       7871819, // CTGAACTCCAGT
+       3763703, // ATGCCGTCTTCT
+       6436535, // CGAGATCGGTCT
+       7136695, // CGTATGCCGTCT
+       8982967, // GAGCACACGTCT
+       8842871, // GACGTGTGCTCT
+       7870071, // CTGAACCGCTCT
+       1148535, // ACACGACGCTCT
+       5005943, // CATACGAGCTCT
+       5565559, // CCCATGTACTCT
+       9290871, // GATCTACACTCT
+       7853623, // CTCTTCCGATCT
+       6428215, // CGAGACCGATCT
+       9155127, // GAGTGTAGATCT
+       5333559, // CCACCGAGATCT
+      15929655, // TTATACACATCT
+       4746983, // CAGACGTGTGCT
+       5996007, // CCGTCTTCTGCT
+      10810855, // GGCATTCCTGCT
+       9196007, // GATACCACTGCT
+      12021415, // GTCTCGTGGGCT
+      15171943, // TGCTGAACCGCT
+       1276007, // ACATCTGACGCT
+       7411815, // CTACACGACGCT
+      10798631, // GGCATACGAGCT
+       7771095, // CTCGGCATTCCT
+       4685655, // CACTCTTTCCCT
+      12464455, // GTTGATACCACT
+       8969287, // GAGATCTACACT
+       4644359, // CACGTCTGAACT
+      10632115, // GGAGATGTGTAT
+      13777843, // TCAGATGTGTAT
+        601779, // AAGCAGTGGTAT
+       5823923, // CCGATCTCGTAT
+      15426995, // TGGTCGCCGTAT
+       7838691, // CTCTGCGTTGAT
+      15170915, // TGCTCTTCCGAT
+       6782307, // CGCTCTTCCGAT
+       2588003, // AGCTCTTCCGAT
+       9838947, // GCCGAGACCGAT
+        572195, // AAGAGTGTAGAT
+      11131427, // GGGCTCGGAGAT
+       8721955, // GACCACCGAGAT
+      10798627, // GGCATACGAGAT
+       9596195, // GCAGCGTCAGAT
+      14249171, // TCGCCGTATCAT
+       7042707, // CGGTCTCGGCAT
+       2131603, // AGAAGACGGCAT
+       6589203, // CGCAGAGTACAT
+      14627091, // TCTTATACACAT
+      16028291, // TTCAGCAGGAAT
+       9525379, // GCACCCGAGAAT
+      13072830, // TACTCTGCGTTG
+      12050046, // GTCTTCTGCTTG
+      16025326, // TTCAGACGTGTG
+       7768302, // CTCGGAGATGTG
+      10176750, // GCGTCAGATGTG
+      13163950, // TAGATCTCGGTG
+        564078, // AAGAGCGTCGTG
+       3098734, // AGTTCAGACGTG
+       2753070, // AGGGAAAGAGTG
+      15054814, // TGCCGTCTTCTG
+       2377438, // AGCACACGTCTG
+       5485022, // CCATGTACTCTG
+      13386974, // TATACACATCTG
+       9688990, // GCATTCCTGCTG
+       5104030, // CATCTGACGCTG
+      14307166, // TCGGCATTCCTG
+      16303390, // TTGATACCACTG
+       6518478, // CGATCTCGTATG
+      10971278, // GGCTCGGAGATG
+       4830350, // CAGCGTCAGATG
+       9579598, // GCAGAGTACATG
+      13781518, // TCAGCAGGAATG
+       2324154, // AGATCTCGGTGG
+      16642874, // TTTCTTATATGG
+       4763962, // CAGAGTACATGG
+      16239850, // TTCTTATATGGG
+       2278634, // AGAGTACATGGG
+       7191338, // CGTCGTGTAGGG
+       9285082, // GATCGGTCTCGG
+      15502810, // TGTAGATCTCGG
+      14395866, // TCGTGGGCTCGG
+       3246298, // ATACGAGATCGG
+      14289050, // TCGGAAGAGCGG
+       2392602, // AGCAGAAGACGG
+      10186442, // GCGTCGTGTAGG
+       7066186, // CGGTTCAGCAGG
+      14528182, // TCTCGGTGGTCG
+      10521014, // GGAAGAGCGTCG
+       2321270, // AGATCGGTCTCG
+       2188150, // AGACCGATCTCG
+      12264310, // GTGTAGATCTCG
+       7793270, // CTCGTGGGCTCG
+       5005878, // CATACGAGATCG
+       3874278, // ATGTACTCTGCG
+        930214, // AATGATACGGCG
+       3572262, // ATCGGAAGAGCG
+      14379302, // TCGTCGGCAGCG
+      15628246, // TGTGCTCTTCCG
+       1472470, // ACCGCTCTTCCG
+       8812502, // GACGCTCTTCCG
+       6453206, // CGAGCTCTTCCG
+      12864982, // TACACATCTCCG
+       7890838, // CTGACGCTGCCG
+      14529430, // TCTCGTATGCCG
+       9601942, // GCAGGAATGCCG
+       7056790, // CGGTGGTCGCCG
+       6267990, // CCTTGGCACCCG
+       3760662, // ATGCCGAGACCG
+       6915350, // CGGCGACCACCG
+       6191126, // CCTGCTGAACCG
+       2204870, // AGACGGCATACG
+       1128326, // ACACATCTGACG
+       1697158, // ACGCTGCCGACG
+       5706118, // CCCTACACGACG
+      10679430, // GGAGTTCAGACG
+        598150, // AAGCAGAAGACG
+      14034246, // TCCGAGCCCACG
+      16604230, // TTTCCCTACACG
+       8528966, // GAAGAGCACACG
+      12242182, // GTGGTATCAACG
+       2546610, // AGCGTCGTGTAG
+       8424370, // GAAAGAGTGTAG
+      12230050, // GTGGGCTCGGAG
+       4513122, // CACATCTCCGAG
+       2636130, // AGGAATGCCGAG
+      16401762, // TTGGCACCCGAG
+       9982306, // GCGACCACCGAG
+       1723490, // ACGGCATACGAG
+       6444130, // CGAGCCCACGAG
+       3414306, // ATCAACGCAGAG
+      15649826, // TGTGTATAAGAG
+       2320418, // AGATCGGAAGAG
+      11706402, // GTAGGGAAAGAG
+       2255826, // AGAGCGGTTCAG
+       2001874, // ACTGGAGTTCAG
+       6891218, // CGGCAGCGTCAG
+      11747730, // GTATCAACGCAG
+      10155154, // GCGGTTCAGCAG
+      14550866, // TCTGAACTCCAG
+      13378066, // TATAAGAGACAG
+       9366722, // GATGTGTATAAG
+      12265986, // GTGTAGGGAAAG
+       7413245, // CTACACTCTTTC
+       8529597, // GAAGAGCGGTTC
+      14805181, // TGACTGGAGTTC
+      13523837, // TATGCCGTCTTC
+       7268221, // CGTGTGCTCTTC
+       8480637, // GAACCGCTCTTC
+       1599357, // ACGACGCTCTTC
+      12986237, // TACGAGCTCTTC
+      12020029, // GTCTCGGCATTC
+       1411133, // ACCCGAGAATTC
+       3632045, // ATCTCGGTGGTC
+       1609133, // ACGAGATCGGTC
+       6824557, // CGGAAGAGCGTC
+      11965037, // GTCGGCAGCGTC
+      14367085, // TCGTATGCCGTC
+       2245741, // AGAGCACACGTC
+      14710061, // TGAACTCCAGTC
+       8968925, // GAGATCGGTCTC
+       8935645, // GAGACCGATCTC
+       3066077, // AGTGTAGATCTC
+      13386973, // TATACACATCTC
+       2210717, // AGACGTGTGCTC
+      14531229, // TCTCGTGGGCTC
+      10356125, // GCTGAACCGCTC
+      12870045, // TACACGACGCTC
+       9640093, // GCATACGAGCTC
+       2322717, // AGATCTACACTC
+       1800221, // ACGTCTGAACTC
+       2407117, // AGCAGTGGTATC
+      11376333, // GGTCGCCGTATC
+      10352013, // GCTCTTCCGATC
+       5801357, // CCGAGACCGATC
+       2288781, // AGAGTGTAGATC
+       1333389, // ACCACCGAGATC
+       9640077, // GCATACGAGATC
+       8176717, // CTTATACACATC
+      13769657, // TCAGACGTGTGC
+       9887609, // GCCGTCTTCTGC
+       5162873, // CATGTACTCTGC
+       3638905, // ATCTGACGCTGC
+       6897017, // CGGCATTCCTGC
+      14881913, // TGATACCACTGC
+       9296697, // GATCTCGTATGC
+       4794425, // CAGCAGGAATGC
+       3585897, // ATCGGTCTCGGC
+       9570409, // GCAGAAGACGGC
+       7781081, // CTCGGTGGTCGC
+       7987289, // CTGCTGAACCGC
+       4513305, // CACATCTGACGC
+       6047257, // CCTACACGACGC
+      15414297, // TGGTATCAACGC
+       1275273, // ACATCTCCGAGC
+       6893961, // CGGCATACGAGC
+       9281673, // GATCGGAAGAGC
+       9023305, // GAGCGGTTCAGC
+      12875765, // TACACTCTTTCC
+      12295669, // GTGTGCTCTTCC
+        368117, // AACCGCTCTTCC
+       6397429, // CGACGCTCTTCC
+       1613301, // ACGAGCTCTTCC
+      14525685, // TCTCGGCATTCC
+       5644533, // CCCGAGAATTCC
+       3216245, // ATACACATCTCC
+       7200885, // CGTCTGAACTCC
+      14555621, // TCTGACGCTGCC
+       3632357, // ATCTCGTATGCC
+       2400485, // AGCAGGAATGCC
+      14347109, // TCGGTGGTCGCC
+       5101093, // CATCTCCGAGCC
+       1171413, // ACACTCTTTCCC
+       3627157, // ATCTCCGAGCCC
+      15136965, // TGCGTTGATACC
+       9202053, // GATACGGCGACC
+        940165, // AATGCCGAGACC
+       1728837, // ACGGCGACCACC
+      14130693, // TCCTGCTGAACC
+        411825, // AACGCAGAGTAC
+       1450865, // ACCGAGATCTAC
+       7861617, // CTCTTTCCCTAC
+      15589169, // TGTCTCTTATAC
+       7978545, // CTGCGTTGATAC
+        551217, // AAGACGGCATAC
+      12864993, // TACACATCTGAC
+      14883425, // TGATACGGCGAC
+       8812897, // GACGCTGCCGAC
+      14009441, // TCCCTACACGAC
+       8623649, // GAATGCCGAGAC
+       2442785, // AGCCCACGAGAC
+      15516193, // TGTATAAGAGAC
+      15252769, // TGGAGTTCAGAC
+       4343841, // CAAGCAGAAGAC
+        479953, // AACTCCAGTCAC
+      14289041, // TCGGAAGAGCAC
+       7702865, // CTCCGAGCCCAC
+       7310417, // CGTTGATACCAC
+      13015121, // TACGGCGACCAC
+       6436625, // CGAGATCTACAC
+       8345361, // CTTTCCCTACAC
+      14545681, // TCTCTTATACAC
+      10520849, // GGAAGAGCACAC
+       1161089, // ACACGTCTGAAC
+      16115585, // TTCCTGCTGAAC
+       3060545, // AGTGGTATCAAC
+       9882940, // GCCGTATCATTA
+       6852332, // CGGAGATGTGTA
+      11833068, // GTCAGATGTGTA
+       9025260, // GAGCGTCGTGTA
+      10494700, // GGAAAGAGTGTA
+       1455980, // ACCGATCTCGTA
+      12245356, // GTGGTCGCCGTA
+       4297260, // CAACGCAGAGTA
+       4557020, // CACCGAGATCTA
+       1965404, // ACTCTTTCCCTA
+       8091596, // CTGTCTCTTATA
+       8974028, // GAGATGTGTATA
+       4779724, // CAGATGTGTATA
+      14577548, // TCTGCGTTGATA
+       8526412, // GAAGACGGCATA
+       1959672, // ACTCTGCGTTGA
+      14645752, // TCTTCTGCTTGA
+       9509752, // GCACACGTCTGA
+       3216248, // ATACACATCTGA
+       5201528, // CATTCCTGCTGA
+      11988136, // GTCGTGTAGGGA
+       7251816, // CGTGGGCTCGGA
+      11487528, // GGTTCAGCAGGA
+       3720856, // ATGATACGGCGA
+      12181336, // GTGCTCTTCCGA
+       5889880, // CCGCTCTTCCGA
+       1695576, // ACGCTCTTCCGA
+       9035608, // GAGCTCTTCCGA
+       1128280, // ACACATCTCCGA
+      14786136, // TGACGCTGCCGA
+       4853336, // CAGGAATGCCGA
+       8294744, // CTTGGCACCCGA
+      15042648, // TGCCGAGACCGA
+      10884184, // GGCGACCACCGA
+       8819480, // GACGGCATACGA
+       6788632, // CGCTGCCGACGA
+       5805336, // CCGAGCCCACGA
+      16085272, // TTCCCTACACGA
+        143048, // AAAGAGTGTAGA
+      15365768, // TGGGCTCGGAGA
+      10544520, // GGAATGCCGAGA
+      15275400, // TGGCACCCGAGA
+       6374792, // CGACCACCGAGA
+       6893960, // CGGCATACGAGA
+       8999304, // GAGCCCACGAGA
+      12267656, // GTGTATAAGAGA
+       8007496, // CTGGAGTTCAGA
+      10787656, // GGCAGCGTCAGA
+      13436488, // TATCAACGCAGA
+       3912456, // ATGTGTATAAGA
+      15509512, // TGTAGGGAAAGA
+        563956, // AAGAGCGGTTCA
+       8889076, // GACTGGAGTTCA
+      14305716, // TCGGCAGCGTCA
+       8508596, // GAACTCCAGTCA
+       9628468, // GCAGTGGTATCA
+      11950900, // GTCGCCGTATCA
+      14343588, // TCGGTCTCGGCA
+       4727204, // CAGAAGACGGCA
+      11325540, // GGTATCAACGCA
+       3572260, // ATCGGAAGAGCA
+       2538788, // AGCGGTTCAGCA
+       5800916, // CCGAGAATTCCA
+      12026324, // GTCTGAACTCCA
+      14508628, // TCTCCGAGCCCA
+      10216212, // GCGTTGATACCA
+       3253780, // ATACGGCGACCA
+       1647300, // ACGCAGAGTACA
+       5803460, // CCGAGATCTACA
+      14669252, // TCTTTCCCTACA
+      12025028, // GTCTCTTATACA
+      11733124, // GTATAAGAGACA
+       6824516, // CGGAAGAGCACA
+       7851076, // CTCTTATACACA
+       5977328, // CCGTATCATTAA
+       2341680, // AGATGTGTATAA
+       8251360, // CTTCTGCTTGAA
+       4484576, // CACACGTCTGAA
+       4028896, // ATTCCTGCTGAA
+      14398112, // TCGTGTAGGGAA
+      12395680, // GTTCAGCAGGAA
+      10769952, // GGCACCCGAGAA
+       5557280, // CCCATATAAGAA
+       4959440, // CAGTGGTATCAA
+       7132096, // CGTATCATTAAA
+      16228224, // TTCTGCTTGAAA
+       7260800, // CGTGTAGGGAAA
+       5451904, // CCATATAAGAAA
+      11751168, // GTATCATTAAAA
+      14581248, // TCTGCTTGAAAA
+      13450240, // TATCATTAAAAA
+       7993344, // CTGCTTGAAAAA
+       3469312, // ATCATTAAAAAA
+      15196160, // TGCTTGAAAAAA
+      13877248, // TCATTAAAAAAA
+      10452992, // GCTTGAAAAAAA
+       5177344, // CATTAAAAAAAA
+       8257536, // CTTGAAAAAAAA
+       3932160, // ATTAAAAAAAAA
+      16252928, // TTGAAAAAAAAA
+      15728640, // TTAAAAAAAAAA
+      14680064, // TGAAAAAAAAAA
+	};
+}
\ No newline at end of file
diff --git a/src/common/illumina_adapters_static.h b/src/common/illumina_adapters_static.h
new file mode 100644
index 0000000..b90a3d7
--- /dev/null
+++ b/src/common/illumina_adapters_static.h
@@ -0,0 +1,12 @@
+#ifndef _ILLUMINA_ADAPTERS_STATIC
+#define _ILLUMINA_ADAPTERS_STATIC
+#include <vector>
+#include <cinttypes>
+
+class IlluminaAdaptersStatic {
+	
+public:
+	static std::vector<uint64_t> Get12Mers();
+};
+
+#endif // !_ILLUMINA_ADAPTERS_STATIC
diff --git a/src/satc/kmc_api/kmc_file.cpp b/src/common/kmc_api/kmc_file.cpp
similarity index 100%
rename from src/satc/kmc_api/kmc_file.cpp
rename to src/common/kmc_api/kmc_file.cpp
diff --git a/src/satc/kmc_api/kmc_file.h b/src/common/kmc_api/kmc_file.h
similarity index 100%
rename from src/satc/kmc_api/kmc_file.h
rename to src/common/kmc_api/kmc_file.h
diff --git a/src/satc/kmc_api/kmer_api.cpp b/src/common/kmc_api/kmer_api.cpp
similarity index 100%
rename from src/satc/kmc_api/kmer_api.cpp
rename to src/common/kmc_api/kmer_api.cpp
diff --git a/src/satc/kmc_api/kmer_api.h b/src/common/kmc_api/kmer_api.h
similarity index 100%
rename from src/satc/kmc_api/kmer_api.h
rename to src/common/kmc_api/kmer_api.h
diff --git a/src/satc/kmc_api/kmer_defs.h b/src/common/kmc_api/kmer_defs.h
similarity index 100%
rename from src/satc/kmc_api/kmer_defs.h
rename to src/common/kmc_api/kmer_defs.h
diff --git a/src/satc/kmc_api/mmer.cpp b/src/common/kmc_api/mmer.cpp
similarity index 100%
rename from src/satc/kmc_api/mmer.cpp
rename to src/common/kmc_api/mmer.cpp
diff --git a/src/satc/kmc_api/mmer.h b/src/common/kmc_api/mmer.h
similarity index 100%
rename from src/satc/kmc_api/mmer.h
rename to src/common/kmc_api/mmer.h
diff --git a/src/common/satc_data.h b/src/common/satc_data.h
index 7124920..aa323d6 100644
--- a/src/common/satc_data.h
+++ b/src/common/satc_data.h
@@ -9,6 +9,12 @@
 
 #include "../../libs/refresh/zstd_file.h"
 
+#ifdef _WIN32
+#define _bswap64(x) _byteswap_uint64(x)
+#else
+#define _bswap64(x) __builtin_bswap64(x)
+#endif
+
 #define USE_ZSTD_FOR_TEMPS
 
 template<typename T>
@@ -271,6 +277,31 @@ bool LoadBigEndian(std::vector<uint8_t>::iterator& p, T& data, uint8_t data_size
 	return true;
 }
 
+inline uint32_t get_rev_compl_shift(uint32_t len)
+{
+	return (len + 31) / 32 * 32 - len;
+}
+
+//instead of x >> 2*p there is (x>>p) >> p, because
+//for p=32 we would have x >> 64 which is UB
+inline uint64_t shr_2p(uint64_t x, uint64_t p) {
+	return (x >> p) >> p;
+}
+
+inline uint64_t get_rev_compl(const uint64_t kmer, const uint32_t shift) {
+	uint64_t res;
+
+	const uint64_t mask4 = 0b0000111100001111000011110000111100001111000011110000111100001111ull;
+	const uint64_t mask2 = 0b0011001100110011001100110011001100110011001100110011001100110011ull;
+
+	uint64_t sf4 = ((kmer & mask4) << 4) + ((kmer >> 4) & mask4);
+	uint64_t sf2 = ((sf4 & mask2) << 2) + ((sf4 >> 2) & mask2);
+
+	res = shr_2p((~_bswap64(sf2)), shift);
+
+	return res;
+}
+
 inline uint64_t str_kmer_to_uint64_t(const std::string& kmer) {
 	uint64_t res{};
 	static auto mapping = []() {
@@ -351,14 +382,14 @@ struct Header {
 	}
 };
 
-enum class RecFmt { SATC, NOMAD };
+enum class RecFmt { SATC, SPLASH };
 struct RecFmtConv {
 
 	inline static RecFmt from_string(const std::string& str) {
 		if (str == "satc")
 			return RecFmt::SATC;
-		else if (str == "nomad")
-			return RecFmt::NOMAD;
+		else if (str == "splash")
+			return RecFmt::SPLASH;
 		else {
 			std::cerr << "Error: cannot convert \"" << str << "\" to RecordPrintFormat\n";
 			exit(1);
@@ -369,8 +400,8 @@ struct RecFmtConv {
 		{
 		case RecFmt::SATC:
 			return "satc";
-		case RecFmt::NOMAD:
-			return "nomad";
+		case RecFmt::SPLASH:
+			return "splash";
 		default:
 			std::cerr << "Error: unsupported, swich in RecordPrintFormat should be extended";
 			exit(1);
@@ -509,7 +540,7 @@ struct Record
 			oss << kmer_to_string(target, header.target_len_symbols) << "\t";
 			oss << count << "\n";
 		}
-		else if (format == RecFmt::NOMAD) {
+		else if (format == RecFmt::SPLASH) {
 			oss << count << " ";
 			oss << kmer_to_string(anchor, header.anchor_len_symbols);
 			oss << kmer_to_string(target, header.target_len_symbols) << " ";
diff --git a/src/common/target_count.h b/src/common/target_count.h
new file mode 100644
index 0000000..db0c5b5
--- /dev/null
+++ b/src/common/target_count.h
@@ -0,0 +1,16 @@
+#ifndef _TARGET_COUNT_H
+#define _TARGET_COUNT_H
+#include <cstdint>
+
+struct TargetCount {
+	uint64_t target;
+	uint32_t count;
+	TargetCount(uint64_t target, uint32_t count) :
+		target(target),
+		count(count)
+	{
+
+	}
+};
+
+#endif //_TARGET_COUNT_H
diff --git a/src/common/version.h b/src/common/version.h
index 8871377..de57976 100644
--- a/src/common/version.h
+++ b/src/common/version.h
@@ -1,7 +1,7 @@
 #pragma once
 
-#define NOMAD_VER "2.0.0"
+#define SPLASH_VER "2.1.4"
 
-inline void NOMAD_VER_PRINT(std::ostream& oss) {
-	oss << "Version: " << NOMAD_VER << "\n";
+inline void SPLASH_VER_PRINT(std::ostream& oss) {
+	oss << "Version: " << SPLASH_VER << "\n";
 }
diff --git a/src/satc/satc.cpp b/src/satc/satc.cpp
index 06bb4b1..8e78ef6 100644
--- a/src/satc/satc.cpp
+++ b/src/satc/satc.cpp
@@ -4,11 +4,15 @@
 #include <fstream>
 #include <algorithm>
 #include "../common/version.h"
-#include "kmc_api/kmer_api.h"
-#include "kmc_api/kmc_file.h"
+#include "../common/kmc_api/kmer_api.h"
+#include "../common/kmc_api/kmc_file.h"
 #include "../common/murmur64.h"
 #include "../common/satc_data.h"
 #include "../common/poly_ACGT_filter.h"
+#include "../common/artifacts_filter.h"
+#include "../common/hamming_filter.h"
+#include "../common/illumina_adapters_static.h"
+#include "../common/target_count.h"
 
 struct SampleDesc
 {
@@ -24,6 +28,9 @@ struct Params
 	uint64_t n_bins{};
 	uint64_t anchor_sample_counts_threshold{}; //keep only anchors with counts > anchor_sample_counts_threshold
 	uint64_t poly_ACGT_len{};
+	uint64_t min_hamming_threshold{};
+	std::string artifacts;
+	bool dont_filter_illumina_adapters = false;
 	std::string out_base;
 	void Print(std::ostream& oss) const
 	{
@@ -33,11 +40,14 @@ struct Params
 		oss << "outbase                        : " << out_base << "\n";
 		oss << "anchor_sample_counts_threshold : " << anchor_sample_counts_threshold << "\n";
 		oss << "poly_ACGT_len                  : " << poly_ACGT_len << "\n";
+		oss << "artifacts                      : " << artifacts << "\n";
+		oss << "dont_filter_illumina_adapters  : " << std::boolalpha << dont_filter_illumina_adapters << "\n";
+		oss << "min_hamming_threshold          : " << min_hamming_threshold << "\n";
 		oss << "input sample                   : " << input_sample.input_kmc_db_path << " " << input_sample.sample_id << "\n";
 	}
 	static void Usage(char* prog_name) {
 		std::cerr << "satc (sample anchor target count)\n";
-		NOMAD_VER_PRINT(std::cerr);
+		SPLASH_VER_PRINT(std::cerr);
 		std::cerr << "Usage: \n\t" << prog_name << " [options] <outbase> <input_path> <input_id>\n";
 		std::cerr
 			<< "Positional parameters:\n"
@@ -50,12 +60,17 @@ struct Params
 			<< "    --target_len <int> - target len\n"
 			<< "    --n_bins <int> - number of output bins\n"
 			<< "    --poly_ACGT_len <int> - all anchors containing polyACGT of this length will be filtered out (0 means no filtering)\n"
-			<< "    --anchor_sample_counts_threshold <int> - keep only anchors with counts > anchor_sample_counts_threshold\n";
+			<< "    --artifacts <string> - path to artifacts, each anchor containing artifact will be filtered out\n"
+			<< "    --dont_filter_illumina_adapters - if used anchors containing Illumina adapters will not be filtered out\n"
+			<< "    --anchor_sample_counts_threshold <int> - keep only anchors with counts > anchor_sample_counts_threshold\n"
+			<< "    --min_hamming_threshold <int> - keep only anchors with a pair of targets that differ by >= min_hamming_threshold\n";
 	}
 };
 
 struct Stats {
 	uint64_t tot_poly_filtered_out{};
+	uint64_t tot_artifacts_filtered_out{};
+	uint64_t tot_hamming_distance_filtered_out{};
 	uint64_t tot_cnt_threshold_filtered_out{};
 	uint64_t tot_unique_anchors{};
 	uint64_t tot_out_recs{};
@@ -63,6 +78,8 @@ struct Stats {
 
 	void print(std::ostream& oss) const {
 		oss << "# poly filtered anchors	               : " << tot_poly_filtered_out << "\n";
+		oss << "# artifacts filtered anchors	       : " << tot_artifacts_filtered_out << "\n";
+		oss << "# hamming distance filtered anchors    : " << tot_hamming_distance_filtered_out << "\n";
 		oss << "# filtered cnt threshold anchors       : " << tot_cnt_threshold_filtered_out << "\n";
 		oss << "# unique anchors                       : " << tot_unique_anchors << "\n";
 		oss << "# out recs                             : " << tot_out_recs << "\n";
@@ -105,6 +122,14 @@ Params read_params(int argc, char** argv)
 			std::string tmp = argv[++i];
 			res.poly_ACGT_len = std::stoull(tmp);
 		}
+		else if (param == "--min_hamming_threshold") {
+			std::string tmp = argv[++i];
+			res.min_hamming_threshold = std::stoull(tmp);
+		}
+		else if (param == "--artifacts")
+			res.artifacts = argv[++i];
+		else if (param == "--dont_filter_illumina_adapters")
+			res.dont_filter_illumina_adapters = true;
 	}
 	if (i >= argc) {
 		std::cerr << "Error: outbase missing\n";
@@ -160,17 +185,6 @@ void split(CKmerAPI& to_split, uint64_t& anchor, uint64_t& target, uint8_t ancho
 	target = to_split.subkmer(to_split.get_len() - target_len_symbols, target_len_symbols);
 }
 
-struct TargetCount {
-	uint64_t target;
-	uint32_t count;
-	TargetCount(uint64_t target, uint32_t count) :
-		target(target),
-		count(count)
-	{
-
-	}
-};
-
 void sort_merge_and_store(uint64_t anchor,
 						  std::vector<TargetCount>& targets_in_current_anchor,
 						  const Header& header,
@@ -178,6 +192,8 @@ void sort_merge_and_store(uint64_t anchor,
 						  std::vector<buffered_binary_writer>& bins,
 						  uint64_t anchor_sample_counts_threshold,
 						  const PolyACGTFilter& poly_ACGT_filter,
+						  const ArtifactsFilter& artifacts_filter,
+						  const HammingFilter& hamming_filter,
 						  Stats& stats) {
 	rec.anchor = anchor;
 	if (targets_in_current_anchor.empty())
@@ -192,13 +208,23 @@ void sort_merge_and_store(uint64_t anchor,
 	for (auto& x : targets_in_current_anchor)
 		tot_count += x.count;
 
+	if (tot_count <= anchor_sample_counts_threshold) {
+		++stats.tot_cnt_threshold_filtered_out;
+		return;
+	}
+
 	if (poly_ACGT_filter.IsPolyACGT(anchor, header.anchor_len_symbols)) {
 		++stats.tot_poly_filtered_out;
 		return;
 	}
 
-	if (tot_count <= anchor_sample_counts_threshold) {
-		++stats.tot_cnt_threshold_filtered_out;
+	if (artifacts_filter.ContainsArtifact(anchor, header.anchor_len_symbols)) {
+		++stats.tot_artifacts_filtered_out;
+		return;
+	}
+
+	if (!hamming_filter.ContainsDistantPair(targets_in_current_anchor)) {
+		++stats.tot_hamming_distance_filtered_out;
 		return;
 	}
 
@@ -233,6 +259,8 @@ void process_kmc_db(const std::string& path,
 					std::vector<buffered_binary_writer>& bins,
 					uint64_t anchor_sample_counts_threshold,
 					const PolyACGTFilter& poly_ACGT_filter,
+					const ArtifactsFilter& artifacts_filter,
+					const HammingFilter& hamming_filter,
 					Stats& stats)
 {
 	CKMCFile kmc_db;
@@ -279,7 +307,17 @@ void process_kmc_db(const std::string& path,
 		if (prev_anchor == anchor)
 			targets_in_current_anchor.emplace_back(target, count);
 		else {
-			sort_merge_and_store(prev_anchor, targets_in_current_anchor, header, rec, bins, anchor_sample_counts_threshold, poly_ACGT_filter, stats);
+			sort_merge_and_store(prev_anchor,
+				targets_in_current_anchor,
+				header,
+				rec,
+				bins,
+				anchor_sample_counts_threshold,
+				poly_ACGT_filter,
+				artifacts_filter,
+				hamming_filter,
+				stats);
+
 			targets_in_current_anchor.clear();
 			targets_in_current_anchor.emplace_back(target, count);
 			prev_anchor = anchor;
@@ -291,7 +329,16 @@ void process_kmc_db(const std::string& path,
 	}
 	std::cerr << "\n";
 
-	sort_merge_and_store(prev_anchor, targets_in_current_anchor, header, rec, bins, anchor_sample_counts_threshold, poly_ACGT_filter, stats);
+	sort_merge_and_store(prev_anchor,
+		targets_in_current_anchor,
+		header,
+		rec,
+		bins,
+		anchor_sample_counts_threshold,
+		poly_ACGT_filter,
+		artifacts_filter,
+		hamming_filter,
+		stats);
 	targets_in_current_anchor.clear();
 }
 
@@ -361,8 +408,21 @@ int main(int argc, char** argv)
 	Stats stats;
 
 	PolyACGTFilter poly_ACGT_filter(params.poly_ACGT_len);
-
-	process_kmc_db(params.input_sample.input_kmc_db_path, params.input_sample.sample_id, header, out_files, params.anchor_sample_counts_threshold, poly_ACGT_filter, stats);
+	ArtifactsFilter artifacts_filter(params.artifacts);
+	HammingFilter hamming_filter(params.min_hamming_threshold);
+
+	if (!params.dont_filter_illumina_adapters)
+		artifacts_filter.Add(12, IlluminaAdaptersStatic::Get12Mers());
+
+	process_kmc_db(params.input_sample.input_kmc_db_path,
+		params.input_sample.sample_id,
+		header,
+		out_files,
+		params.anchor_sample_counts_threshold,
+		poly_ACGT_filter,
+		artifacts_filter,
+		hamming_filter,
+		stats);
 
 	stats.print(std::cerr);
 }
diff --git a/src/satc_dump/satc_dump.cpp b/src/satc_dump/satc_dump.cpp
index 48cf535..e9a752b 100644
--- a/src/satc_dump/satc_dump.cpp
+++ b/src/satc_dump/satc_dump.cpp
@@ -30,7 +30,7 @@ struct Params {
 
 	static void Usage(char* prog_name) {
 		std::cerr << "satc_dump\n";
-		NOMAD_VER_PRINT(std::cerr);
+		SPLASH_VER_PRINT(std::cerr);
 		std::cerr << "Usage: \n\t" << prog_name << " [options] <input> <output>\n";
 		std::cerr << "or\n";
 		std::cerr << "\t" << prog_name << " --which-bin --n_bins <int> <anchor>\n";
@@ -42,7 +42,7 @@ struct Params {
 			<< "Options:\n"
 			<< "    --anchor_list <path>  - path to text file containing anchors separated by whitespaces, only anchors from this file will be dumped\n"
 			<< "    --sample_names <path> - path for decode sample id, each line should contain <sample_name> <sample_id>\n"
-			<< "    --format <string>     - output format, available options: satc, nomad (default: satc)\n"
+			<< "    --format <string>     - output format, available options: satc, splash (default: satc)\n"
 			<< "    --n_bins <int>        - if set to value different than 0 the input is interpreted as a list of bins (each bin in separate line, first list is bin_0, second line is bin_1, etc. (in case of ill-formed input results will be incorrect)\n"
 			<< "    --separately          - if set with n_bins != 0 output param will be treated as suffix name and there will be output for each bin\n";
 	}
diff --git a/src/satc_merge/extra_stats.cpp b/src/satc_merge/extra_stats.cpp
index 5f90a3f..cb6c39b 100644
--- a/src/satc_merge/extra_stats.cpp
+++ b/src/satc_merge/extra_stats.cpp
@@ -1,4 +1,5 @@
 #include "extra_stats.h"
+#include "../../libs/refresh/deterministic_random.h"
 #include <array>
 #include <random>
 #include <algorithm>
@@ -109,8 +110,8 @@ void CExtraStats::determine_monte_carlo_trials(size_t no_trials)
 	auto tot_count = cum_counts.back();
 
 	std::mt19937_64 mt;
-	std::uniform_int_distribution<> dist1(0, tot_count - 1);
-	std::uniform_int_distribution<> dist2(0, tot_count - 2);
+	det_uniform_int_distribution<> dist1(0, tot_count - 1);
+	det_uniform_int_distribution<> dist2(0, tot_count - 2);
 
 //	std::multiset<std::pair<uint64_t, uint64_t>> unq_s;		// !!! TODO: zmienic na unordered_multiset, albo od razu wyniki wpisywac do monte_carlo_trials
 
diff --git a/src/satc_merge/get_train_mtx.h b/src/satc_merge/get_train_mtx.h
index 9e440be..cc14210 100644
--- a/src/satc_merge/get_train_mtx.h
+++ b/src/satc_merge/get_train_mtx.h
@@ -9,53 +9,24 @@
 using std::cerr;
 using std::endl;
 
-namespace detail {
-	//my iteartor to not generate the range...
-	struct MyIt {
-
-		using iterator_category = std::random_access_iterator_tag;
-		using value_type = size_t;
-		using reference = value_type&;
-		using difference_type = size_t;
-
-		size_t v;
-		MyIt(size_t v) : v(v) {}
-
-
-		reference operator*() {
-			return v;
-		}
-		MyIt& operator++() {
-			++v;
-			return *this;
-		}
-		difference_type operator-(const MyIt& rhs) const {
-			return v - rhs.v;
-		}
+template<typename RNG>
+void my_sample(size_t max_val, std::vector<size_t>& dest, size_t to_select, RNG&& eng) {
+	dest.resize(max_val);
+	std::iota(dest.begin(), dest.end(), 0ull);
 
-		bool operator==(const MyIt& rhs) {
-			return v == rhs.v;
-		}
-	};
-}
+	for (size_t i = 0; i < to_select; ++i)
+		std::swap(dest[i], dest[i + eng() % (dest.size() - i)]);
 
-template<> struct std::iterator_traits<detail::MyIt>
-{
-	using iterator_category = typename detail::MyIt::iterator_category;
-	using value_type = typename detail::MyIt::value_type;
-	using difference_type = typename detail::MyIt::difference_type;
-};
-
-template<typename OutIter, typename RNG>
-void my_sample(size_t start, size_t end, OutIter dest, size_t N, RNG&& eng) {
-	std::sample(detail::MyIt(start), detail::MyIt(end), dest, N, eng);
+	dest.resize(to_select);
+	sort(dest.begin(), dest.end());
 }
 
 template<typename RNG>
-//refresh::matrix_sparse<uint32_t, double, refresh::matrix_col_major> get_train_mtx_2(const refresh::matrix_sparse<uint32_t, double, refresh::matrix_col_major>& X, double opt_train_fraction, RNG&& eng)
-refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major> get_train_mtx_2(const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& X, double opt_train_fraction, RNG&& eng)
+refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major> get_train_mtx_2(
+	const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& X,
+	double opt_train_fraction,
+	RNG&& eng)
 {
-	//refresh::matrix_sparse<uint32_t, double, refresh::matrix_col_major> res(X.rows(), X.cols());
 	refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major> res(X.rows(), X.cols());
 
 	res.reserve(X.data_size());
@@ -72,9 +43,8 @@ refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major> get_
 			tot_cs += p_col_end->second;
 
 		size_t to_select = tot_cs * opt_train_fraction;
-		indices.resize(to_select + 1);								// + 1 for a guard
-		my_sample(0, tot_cs, indices.begin(), to_select, eng);
-		indices.back() = std::numeric_limits<size_t>::max();		// guard
+		my_sample(tot_cs, indices, to_select, eng);
+		indices.push_back(std::numeric_limits<size_t>::max()); // guard
 
 		double cs = 0;
 
diff --git a/src/satc_merge/matrix_sparse_compact.h b/src/satc_merge/matrix_sparse_compact.h
index 3f7d657..0e11c55 100644
--- a/src/satc_merge/matrix_sparse_compact.h
+++ b/src/satc_merge/matrix_sparse_compact.h
@@ -123,6 +123,11 @@ template<typename I, typename T, unsigned ORDER> class matrix_sparse_compact
 		return *this;
 	}
 
+	bool operator==(const matrix_sparse_compact<I, T, ORDER>& x) const
+	{
+		return n_rows == x.n_rows && n_cols == x.n_cols && data == x.data;
+	}
+
 	~matrix_sparse_compact()
 	{}
 
@@ -351,7 +356,7 @@ template<typename I, typename T, unsigned ORDER> class matrix_sparse_compact
 			if (p_src1->first == p_src2->first)
 			{
 				elem.second -= p_src2->second;
-				if(elem.second != (T)0)
+				if (elem.second != (T)0)
 					*p_dest++ = elem;
 				++p_src1;
 				++p_src2;
@@ -390,6 +395,60 @@ template<typename I, typename T, unsigned ORDER> class matrix_sparse_compact
 		return *this;
 	}
 
+	matrix_sparse_compact<I, T, ORDER>& operator+=(matrix_sparse_compact<I, T, ORDER>& x)
+	{
+		data_t new_elems;
+
+		auto p_src1 = data.begin();
+		auto p_src2 = x.data.begin();
+		auto p_dest = data.begin();
+
+		auto p_end1 = data.end();
+		auto p_end2 = x.data.end();
+
+		while (p_src1 != p_end1 && p_src2 != p_end2)
+		{
+			auto elem = *p_src1;
+
+			if (p_src1->first == p_src2->first)
+			{
+				elem.second += p_src2->second;
+				if (elem.second != (T)0)
+					*p_dest++ = elem;
+				++p_src1;
+				++p_src2;
+			}
+			else if (p_src1->first < p_src2->first)
+			{
+				*p_dest++ = elem;
+				++p_src1;
+			}
+			else
+			{
+				new_elems.emplace_back(*p_src2);
+				++p_src2;
+			}
+		}
+
+		if (p_src1 != p_end1)
+			while (p_src1 != p_end1)
+				*p_dest++ = *p_src1++;
+		else
+			new_elems.insert(new_elems.end(), p_src2, p_end2);
+
+		data.erase(p_dest, p_end1);
+
+		if (!new_elems.empty())
+		{
+			data.insert(data.end(), new_elems.begin(), new_elems.end());
+
+			// TODO: consider using inplace_merge here
+			std::sort(data.begin(), data.end());
+		}
+
+		return *this;
+	}
+
 	template<typename X>
 	matrix_sparse_compact<I, T, ORDER> compact(std::vector<X>& preserve_rows, std::vector<X>& preserve_cols) const
 	{
diff --git a/src/satc_merge/pvals.cpp b/src/satc_merge/pvals.cpp
index 0620cf2..df2d905 100644
--- a/src/satc_merge/pvals.cpp
+++ b/src/satc_merge/pvals.cpp
@@ -5,6 +5,7 @@
 #include <cmath>
 #include <fstream>
 
+#include "../../libs/refresh/deterministic_random.h"
 #include "pvals.h"
 #include "get_train_mtx.h"
 #include "helmert_decomposition.h"
@@ -117,9 +118,6 @@ void normalize_vec_0_1(refresh::matrix_1d<double>& vec)
 		vec(i) = (vec(i) - _min) / dif;
 }
 
-// --is_10X --n_most_freq_targets 10 --anchor_count_threshold 50 --anchor_unique_targets_threshold 1 --anchor_samples_threshold 1 stats/bin_1.txt_new out/S10_cnts_1 out/S11_cnts_1 out/S12_cnts_1 out/S13_cnts_1
-// --is_10X  --n_most_freq_targets 10 --anchor_count_threshold 50 --anchor_unique_targets_threshold 1 --anchor_samples_threshold 1 stats/bin_1.txt_new c:/linux/nomad-pp-dev/example/S10_cnts_1.zst c:/linux/nomad-pp-dev/example/S11_cnts_1.zst c:/linux/nomad-pp-dev/example/S12_cnts_1.zst c:/linux/nomad-pp-dev/example/S13_cnts_1.zst
-//double effectSize_cts(const refresh::matrix_sparse<uint32_t, double, refresh::matrix_col_major>& X, const refresh::matrix_1d<double>& cOpt, const refresh::matrix_1d<double>& fOpt) {
 double effectSize_cts(const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& X, const refresh::matrix_1d<double>& cOpt, const refresh::matrix_1d<double>& fOpt) {
 	auto f_abs_sum = (X * abs(cOpt)).sum();
 
@@ -159,7 +157,7 @@ double effectSize_bin(const refresh::matrix_sparse_compact<uint32_t, double, ref
 	return fabs(refresh::dot_product(fOpt * X, cPos) / (X * cPos).sum() - refresh::dot_product(fOpt * X, cNeg) / (X * cNeg).sum());
 }
 
-double computeAsympNOMAD(const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& X, const refresh::matrix_1d<double>& cOpt, const refresh::matrix_1d<double>& fOpt)
+double computeAsympSPLASH(const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& X, const refresh::matrix_1d<double>& cOpt, const refresh::matrix_1d<double>& fOpt)
 {
 	if (cOpt.all_items_same())
 		return 1.0;
@@ -203,7 +201,7 @@ double computeAsympNOMAD(const refresh::matrix_sparse_compact<uint32_t, double,
 
 	// cdf = std::erfc(-x / std::sqrt(2)) / 2;
 
-	double pval = 2 * erfc(abs(normalizedTestStat) / sqrt(2)) / 2;
+	double pval = 2 * erfc(fabs(normalizedTestStat) / sqrt(2)) / 2;
 
 //	cout << pval << endl;
 
@@ -318,12 +316,12 @@ double altMaximize(const refresh::matrix_sparse_compact<uint32_t, double, refres
 //		auto f2 = 1.0 - f1;
 
 		calc_S.set_f(f1);
-		double v1 = abs(calc_S.mult_fT_Xtild_c());
+		double v1 = fabs(calc_S.mult_fT_Xtild_c());
 
 		f1.negate_sign_01();
 //		calc_S.set_f(f2);
 		calc_S.set_f(f1);
-		double v2 = abs(calc_S.mult_fT_Xtild_c());
+		double v2 = fabs(calc_S.mult_fT_Xtild_c());
 
 		if (v1 >= v2)
 		{
@@ -349,10 +347,10 @@ double altMaximize(const refresh::matrix_sparse_compact<uint32_t, double, refres
 		S = calc_S.mult_fT_Xtild_c();
 
 		if (S == Sold)
-			return abs(S);
+			return fabs(S);
 
-		if (i == n_iters - 1 && S != 0 && abs((S - Sold) / S) < 0.001)
-			return abs(S);
+		if (i == n_iters - 1 && S != 0 && fabs((S - Sold) / S) < 0.001)
+			return fabs(S);
 
 		Sold = S;
 	}
@@ -366,8 +364,8 @@ double altMaximize(const refresh::matrix_sparse_compact<uint32_t, double, refres
 //void generate_alt_max_cf(const refresh::matrix_sparse<uint32_t, double, refresh::matrix_col_major>& X_org, refresh::matrix_1d<double>& cOpt, refresh::matrix_1d<double>& fOpt, int no_tries, int opt_num_iters)
 void generate_alt_max_cf(const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& X_org, refresh::matrix_1d<double>& cOpt, refresh::matrix_1d<double>& fOpt, int no_tries, int opt_num_iters)
 {
-	std::default_random_engine eng;
-	std::uniform_int_distribution<int> dist_0_1(0, 1);
+	std::mt19937_64 eng;
+	det_uniform_int_distribution dist_0_1(0, 1);
 	double minus_1_and_1[] = { -1, 1 };
 
 	std::vector<uint64_t> non_zero_cols;
@@ -422,8 +420,8 @@ void generate_alt_max_cf(const refresh::matrix_sparse_compact<uint32_t, double,
 
 void generate_f_from_c(const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& X_org, const refresh::matrix_1d<double>& cOpt, refresh::matrix_1d<double>& fOpt)
 {
-	std::default_random_engine eng;
-	std::uniform_int_distribution<int> dist_0_1(0, 1);
+	std::mt19937_64 eng;
+	det_uniform_int_distribution dist_0_1(0, 1);
 
 	std::vector<uint64_t> non_zero_rows;
 
@@ -449,9 +447,9 @@ void generate_f_from_c(const refresh::matrix_sparse_compact<uint32_t, double, re
 	auto f2 = 1.0 - f1;
 
 	calc_S.set_f(f1);
-	double v1 = abs(calc_S.mult_fT_Xtild_c());
+	double v1 = fabs(calc_S.mult_fT_Xtild_c());
 	calc_S.set_f(f2);
-	double v2 = abs(calc_S.mult_fT_Xtild_c());
+	double v2 = fabs(calc_S.mult_fT_Xtild_c());
 
 	refresh::matrix_1d<double> fMax;
 
@@ -503,10 +501,12 @@ void get_most_freq_targets(
 	}
 }
 
+double calc_pval_base(
+	const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& sp_anch_contingency_table,
+	size_t num_rand_cf,
+	std::mt19937_64& eng) {
 
-//double calc_pval_base(const refresh::matrix_sparse<uint32_t, double, refresh::matrix_col_major>& sp_anch_contingency_table, size_t num_rand_cf, std::default_random_engine& eng) {
-double calc_pval_base(const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& sp_anch_contingency_table, size_t num_rand_cf, std::default_random_engine& eng) {
-	std::uniform_int_distribution<int> dist_0_1(0, 1);
+	det_uniform_int_distribution dist_0_1(0, 1);
 	double minus_1_and_1[] = { -1, 1 };
 	double zero_and_1[] = { 0, 1 };
 	double min_pval = std::numeric_limits<double>::max();
@@ -528,11 +528,16 @@ double calc_pval_base(const refresh::matrix_sparse_compact<uint32_t, double, ref
 	return pval_base;
 }
 
-//double calc_pval_base(const refresh::matrix_sparse<uint32_t, double, refresh::matrix_col_major>& sp_anch_contingency_table, size_t num_rand_cf, std::default_random_engine& eng) {
 // Extended version, returning also old_pval_base and fOptR and cOptR
-double calc_pval_base_ext(const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& sp_anch_contingency_table, size_t num_rand_cf, std::default_random_engine& eng,
-	double &pval_base_old, refresh::matrix_1d<double> &fOptR, refresh::matrix_1d<double> &cOptR) {
-	std::uniform_int_distribution<int> dist_0_1(0, 1);
+double calc_pval_base_ext(
+	const refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major>& sp_anch_contingency_table,
+	size_t num_rand_cf,
+	std::mt19937_64& eng,
+	double &pval_base_old,
+	refresh::matrix_1d<double> &fOptR,
+	refresh::matrix_1d<double> &cOptR) {
+
+	det_uniform_int_distribution dist_0_1(0, 1);
 	double minus_1_and_1[] = { -1, 1 };
 	double zero_and_1[] = { 0, 1 };
 	double min_pval = std::numeric_limits<double>::max();
@@ -574,12 +579,14 @@ void compute_stats(
 	AnchorStats& anchor_stats,
 	bool without_alt_max,
 	bool with_effect_size_cts,
+	bool with_pval_asymp_opt,
 	bool compute_also_old_base_pvals,
 	uint32_t n_most_freq_targets,
 	double opt_train_fraction,
 	int opt_num_inits,
 	int opt_num_iters,
 	size_t num_rand_cf,
+	size_t num_splits,
 	CjWriter& cj_writer,
 	double max_pval_opt_for_Cjs,
 	CBCToCellType* cbc_to_cell_type,
@@ -596,8 +603,7 @@ void compute_stats(
 //	refresh::matrix_sparse<uint32_t, double, refresh::matrix_col_major> sp_anch_contingency_table(n_uniq_targets, n_unique_samples);
 	refresh::matrix_sparse_compact<uint32_t, double, refresh::matrix_col_major> sp_anch_contingency_table(n_uniq_targets, n_unique_samples);
 
-	std::default_random_engine eng;
-	//std::default_random_engine eng(std::random_device{}()); 
+	std::mt19937_64 eng;
 
 	SampleToColMapper mapper(unique_samples);
 
@@ -614,7 +620,7 @@ void compute_stats(
 
 	sp_anch_contingency_table.reserve(anchor.data.size());
 
-		size_t row_id = 0;
+	size_t row_id = 0;
 	for (const auto& e : anchor.data) {
 		if (target != e.target) { //new target -> new row
 			++row_id;
@@ -674,13 +680,55 @@ void compute_stats(
 
 		anchor_stats.pval_opt = testPval(Xtest, cOpt, fOpt);
 
+		if (with_pval_asymp_opt)
+			anchor_stats.pval_asymp_opt = computeAsympSPLASH(Xtest, cOpt, fOpt);
+
 		anchor_stats.effect_size_bin = effectSize_bin(Xtest, cOpt, fOpt);
 
 		if (with_effect_size_cts)
 			anchor_stats.effect_size_cts = effectSize_cts(Xtest, cOpt, fOpt);
 
-		anchor_stats.pval_asymp_base = computeAsympNOMAD(Xtest, cOpt, fOpt);
+		if (num_splits > 1) {
+			//I create new cOpt because in the cOpt I will have the optimal one, which may be needed to print it
+			refresh::matrix_1d<double> new_cOpt(Xtrain.cols());
 
+			for (size_t split_no = 1; split_no < num_splits; ++split_no)
+			{
+				//reconstruct contingency table
+				sp_anch_contingency_table = std::move(Xtest += Xtrain);
+
+				//make a new split
+				Xtrain = get_train_mtx_2(sp_anch_contingency_table, opt_train_fraction, eng);
+				sp_anch_contingency_table -= Xtrain;
+				auto Xtest = std::move(sp_anch_contingency_table);
+
+				generate_alt_max_cf(Xtrain, new_cOpt, fOpt, opt_num_inits, opt_num_iters);
+
+				if (with_pval_asymp_opt) {
+					auto new_pval_asymp_opt = computeAsympSPLASH(Xtest, new_cOpt, fOpt);
+					if (new_pval_asymp_opt < anchor_stats.pval_asymp_opt)
+						anchor_stats.pval_asymp_opt = new_pval_asymp_opt;
+				}
+
+				auto new_pval_opt = testPval(Xtest, new_cOpt, fOpt);
+				if (new_pval_opt < anchor_stats.pval_opt)
+				{
+					anchor_stats.pval_opt = new_pval_opt;
+					anchor_stats.effect_size_bin = effectSize_bin(Xtest, new_cOpt, fOpt);
+
+					if (with_effect_size_cts)
+						anchor_stats.effect_size_cts = effectSize_cts(Xtest, new_cOpt, fOpt);
+
+					if (cj_writer)
+						cOpt = new_cOpt;
+				}
+			}
+
+			anchor_stats.pval_opt *= num_splits;
+			if (with_pval_asymp_opt) {
+				anchor_stats.pval_asymp_opt *= num_splits;
+			}
+		}
 		if (cj_writer && anchor_stats.pval_opt <= max_pval_opt_for_Cjs)
 		{
 			for (size_t col_id = 0; col_id < cOpt.size(); ++col_id)
diff --git a/src/satc_merge/pvals.h b/src/satc_merge/pvals.h
index a4d7738..8c5019a 100644
--- a/src/satc_merge/pvals.h
+++ b/src/satc_merge/pvals.h
@@ -21,7 +21,7 @@ struct AnchorStats {
 	double effect_size_cts;
 	double effect_size_bin;
 	double effect_size_bin_old;
-	double pval_asymp_base;
+	double pval_asymp_opt;
 	double entropy;
 	double avg_no_homopolymer_targets;
 	double avg_hamming_distance_max_target;
@@ -43,7 +43,7 @@ struct AnchorStats {
 	std::vector<uint32_t> cell_types_ids;  //unique ids of cell types for a given Xtrain contignency table
 
 	AnchorStats(double pval_base, double pval_base_old, double pval_opt,
-		double effect_size_cts, double effect_size_bin, double effect_size_bin_old, double pval_asymp_base, double entropy,
+		double effect_size_cts, double effect_size_bin, double effect_size_bin_old, double pval_asymp_opt, double entropy,
 		double avg_no_homopolymer_targets,
 		double avg_hamming_distance_max_target, double avg_hamming_distance_all_pairs,
 		double avg_edit_distance_max_target, double avg_edit_distance_all_pairs
@@ -54,7 +54,7 @@ struct AnchorStats {
 		effect_size_cts(effect_size_cts),
 		effect_size_bin(effect_size_bin),
 		effect_size_bin_old(effect_size_bin_old),
-		pval_asymp_base(pval_asymp_base),
+		pval_asymp_opt(pval_asymp_opt),
 		entropy(entropy),
 		avg_no_homopolymer_targets(avg_no_homopolymer_targets),
 		avg_hamming_distance_max_target(avg_hamming_distance_max_target),
@@ -70,7 +70,7 @@ struct AnchorStats {
 		effect_size_cts(0),
 		effect_size_bin(0),
 		effect_size_bin_old(0),
-		pval_asymp_base(1),
+		pval_asymp_opt(1),
 		entropy(0),
 		avg_no_homopolymer_targets(0),
 		avg_hamming_distance_max_target(0),
@@ -322,12 +322,14 @@ void compute_stats(
 	AnchorStats &anchor_stats,
 	bool without_alt_max,
 	bool with_effect_size_cts,
+	bool with_pval_asymp_opt,
 	bool compute_also_old_base_pvals,
 	uint32_t n_most_freq_targets,
 	double opt_train_fraction,
 	int opt_num_inits,
 	int opt_num_iters,
 	size_t num_rand_cf,
+	size_t num_splits,
 	CjWriter& cj_writer,
 	double max_pval_opt_for_Cjs,
 	CBCToCellType* cbc_to_cell_type,
diff --git a/src/satc_merge/satc_merge.cpp b/src/satc_merge/satc_merge.cpp
index 226f6f3..79bffc1 100644
--- a/src/satc_merge/satc_merge.cpp
+++ b/src/satc_merge/satc_merge.cpp
@@ -33,37 +33,6 @@ class Timer {
 	}
 };
 
-//merge all samples from single bin
-enum class RunMode { CalcStats, JustMerge, JustMergeAndDump };
-std::string to_string(RunMode run_mode) {
-	switch (run_mode)
-	{
-	case RunMode::CalcStats:
-		return "calc_stats";
-	case RunMode::JustMerge:
-		return "just_merge";
-	case RunMode::JustMergeAndDump:
-		return "just_merge_and_dump";
-	default:
-		std::cerr << "Error: unsupported RunMode\n";
-		exit(1);
-		break;
-	}
-}
-
-RunMode run_mode_from_string(const std::string& run_mode) {
-	if (run_mode == "calc_stats")
-		return RunMode::CalcStats;
-	else if (run_mode == "just_merge")
-		return RunMode::JustMerge;
-	else if (run_mode == "just_merge_and_dump")
-		return RunMode::JustMergeAndDump;
-	else {
-		std::cerr << "Error: unknown run mode " << run_mode << "\n";
-		exit(1);
-	}
-}
-
 struct Params
 {
 	uint64_t anchor_count_threshold{}; //remove all anchors for which the sum of counters across all samples and targets is <= anchor_count_threshold
@@ -84,10 +53,14 @@ struct Params
 
 	size_t num_rand_cf = 50;
 
+	size_t num_splits = 1;
+
 	bool without_alt_max = false;
 
 	bool with_effect_size_cts = false;
 
+	bool with_pval_asymp_opt = false;
+
 	bool compute_also_old_base_pvals = false;
 
 	std::string outpath;
@@ -102,7 +75,9 @@ struct Params
 
 	std::vector<std::string> bins;
 
-	RunMode run_mode = RunMode::CalcStats;
+	std::string dump_sample_anchor_target_count_txt;
+	std::string dump_sample_anchor_target_count_binary;
+
 	RecFmt format = RecFmt::SATC; //only for JustMergeAndDump
 
 	std::string cell_type_samplesheet;
@@ -110,30 +85,31 @@ struct Params
 
 	void print(std::ostream& oss) {
 		oss << "Parameters:\n";
-		oss << "\tanchor_count_threshold             : " << anchor_count_threshold << "\n";
-		oss << "\tanchor_unique_targets_threshold    : " << anchor_unique_targets_threshold << "\n";
-		oss << "\tanchor_samples_threshold           : " << anchor_samples_threshold << "\n";
-		oss << "\tis_10X                             : " << std::boolalpha << is_10X << "\n";
-		oss << "\twithout_alt_max                    : " << std::boolalpha << without_alt_max << "\n";
-		oss << "\twith_effect_size_cts               : " << std::boolalpha << with_effect_size_cts << "\n";
-		oss << "\tcompute_also_old_base_pvals        : " << std::boolalpha << compute_also_old_base_pvals << "\n";
-		oss << "\toutpath                            : " << outpath << "\n";
-		oss << "\tanchor_list                        : " << anchor_list << "\n";
-		oss << "\tcjs_out                            : " << cjs_out << "\n";
-		oss << "\tmax_pval_opt_for_Cjs               : " << max_pval_opt_for_Cjs << "\n";
-		oss << "\tsample_names	                     : " << sample_names << "\n";
-		oss << "\tn_most_freq_targets                : " << n_most_freq_targets << "\n";
-		oss << "\tn_most_freq_targets_for_stats      : " << n_most_freq_targets_for_stats << "\n";
-		oss << "\topt_train_fraction                 : " << opt_train_fraction << "\n";
-		oss << "\topt_num_inits                      : " << opt_num_inits << "\n";
-		oss << "\topt_num_iters                      : " << opt_num_iters << "\n";
-		oss << "\tnum_rand_cf                        : " << num_rand_cf << "\n";
-		oss << "\tcell_type_samplesheet              : " << cell_type_samplesheet << "\n";
-		oss << "\tCjs_samplesheet                    : " << Cjs_samplesheet << "\n";
-
-		oss << "\trun_mode                           : " << to_string(run_mode) << "\n";
-		if (run_mode == RunMode::JustMergeAndDump)
-			oss << "\t\t format					       : " << RecFmtConv::to_string(format) << "\n";
+		oss << "\tanchor_count_threshold                  : " << anchor_count_threshold << "\n";
+		oss << "\tanchor_unique_targets_threshold         : " << anchor_unique_targets_threshold << "\n";
+		oss << "\tanchor_samples_threshold                : " << anchor_samples_threshold << "\n";
+		oss << "\tis_10X                                  : " << std::boolalpha << is_10X << "\n";
+		oss << "\twithout_alt_max                         : " << std::boolalpha << without_alt_max << "\n";
+		oss << "\twith_effect_size_cts                    : " << std::boolalpha << with_effect_size_cts << "\n";
+		oss << "\twith_pval_asymp_opt                     : " << std::boolalpha << with_pval_asymp_opt << "\n";
+		oss << "\tcompute_also_old_base_pvals             : " << std::boolalpha << compute_also_old_base_pvals << "\n";
+		oss << "\toutpath                                 : " << outpath << "\n";
+		oss << "\tanchor_list                             : " << anchor_list << "\n";
+		oss << "\tcjs_out                                 : " << cjs_out << "\n";
+		oss << "\tmax_pval_opt_for_Cjs                    : " << max_pval_opt_for_Cjs << "\n";
+		oss << "\tsample_names	                          : " << sample_names << "\n";
+		oss << "\tn_most_freq_targets                     : " << n_most_freq_targets << "\n";
+		oss << "\tn_most_freq_targets_for_stats           : " << n_most_freq_targets_for_stats << "\n";
+		oss << "\topt_train_fraction                      : " << opt_train_fraction << "\n";
+		oss << "\topt_num_inits                           : " << opt_num_inits << "\n";
+		oss << "\topt_num_iters                           : " << opt_num_iters << "\n";
+		oss << "\tnum_rand_cf                             : " << num_rand_cf << "\n";
+		oss << "\tnum_splits                              : " << num_splits << "\n";
+		oss << "\tcell_type_samplesheet                   : " << cell_type_samplesheet << "\n";
+		oss << "\tCjs_samplesheet                         : " << Cjs_samplesheet << "\n";
+		oss << "\t dump_sample_anchor_target_count_txt    : " << dump_sample_anchor_target_count_txt << "\n";
+		oss << "\t dump_sample_anchor_target_count_binary : " << dump_sample_anchor_target_count_binary << "\n";
+		oss << "\t format                                 : " << RecFmtConv::to_string(format) << "\n";
 		oss << "\tinput bins:\n";
 		for (const auto& bin : bins)
 			oss << "\t\t" << bin << "\n";
@@ -141,7 +117,7 @@ struct Params
 
 	static void Usage(char* prog_name) {
 		std::cerr << "satc_merge\n";
-		NOMAD_VER_PRINT(std::cerr);
+		SPLASH_VER_PRINT(std::cerr);
 		std::cerr << "Usage: \n\t" << prog_name << " [options] <path> <outpath> <list_of_bins_to_merge>\n";
 		std::cerr
 			<< "Positional parameters:\n"
@@ -149,27 +125,30 @@ struct Params
 			<< "    <file_with_list_of_bins_to_merge>   - file with list of paths of bins to be processed\n";
 		std::cerr
 			<< "Options:\n"
-			<< "    --is_10X                                      - if used input treat as 10X data\n"// text file containing anchors separated by whitespaces, only anchors from this file will be dumped\n";
-			<< "    --anchor_count_threshold <int>                - filter out all anchors for which the total count <= anchor_count_threshold\n"
-			<< "    --anchor_unique_targets_threshold <int>       - filter out all anchors for which the number of unique targets is <= anchor_unique_targets_threshold\n"
-			<< "    --n_most_freq_targets <int>                   - in calc_stats mode output also n_most_freq_targets most frequent targets and their counts (default: 0)\n"
-			<< "    --n_most_freq_targets_for_stats <int>         - in calc_stats mode use at most n_most_freq_targets_for_stats for each contignency table, 0 means use all (default: 0)\n"
-			<< "    --opt_train_fraction <double> (0.0;1.0)       - in calc_stats mode use this fraction to create train X from contingency table\n"
-			<< "    --opt_num_inits <int>                         - in calc_stats mode the number of altMaximize runs (default: 10)\n"
-			<< "    --opt_num_iters <int>                         - in calc_stats mode the number of iteration in altMaximize (default: 50)\n"
-			<< "    --num_rand_cf <int>                           - in calc_stats mode the number of rand cf (default: 50)\n"
-			<< "    --anchor_samples_threshold <int>              - filter out all anchors for which the number of unique samples is <= anchor_samples_threshold\n"
-			<< "    --anchor_list <string>                        - path to text file containing anchors separated by whitespaces, only anchors from this file will be processed\n"
-			<< "    --cjs_out <string>                            - path to output text file where Cjs will be stored\n"
-			<< "    --max_pval_opt_for_Cjs <double>               - dump only Cjs for anchors that have pval_opt <= max_pval_opt_for_Cjs\n"
-			<< "    --run_mode <string>                           - what to do with merged anchors, available options: calc_stats, just_merge, just_merge_and_dump (default: calc_stats)\n"
-			<< "    --without_alt_max                             - disable alt max computation\n"
-			<< "    --with_effect_size_cts                        - compute effect_size_cts\n"
-			<< "    --compute_also_old_base_pvals                 - compute old base pvals\n"
-			<< "    --sample_names <path>                         - path for decode sample id, each line should contain <sample_name> <sample_id>\n"
-			<< "    --cell_type_samplesheet <path>                - path for mapping barcode to cell type, is used Helmert-based supervised mode is turned on\n"
-			<< "    --Cjs_samplesheet <path>                      - path for file with predefined Cjs for non-10X supervised mode\n"
-			<< "    --format <string>                             - output format when 'just_merge_and_dump' is used as run_mode, available options: satc, nomad (default: satc)\n";
+			<< "    --is_10X                                          - if used input treat as 10X data\n"// text file containing anchors separated by whitespaces, only anchors from this file will be dumped\n";
+			<< "    --anchor_count_threshold <int>                    - filter out all anchors for which the total count <= anchor_count_threshold\n"
+			<< "    --anchor_unique_targets_threshold <int>           - filter out all anchors for which the number of unique targets is <= anchor_unique_targets_threshold\n"
+			<< "    --n_most_freq_targets <int>                       - output also n_most_freq_targets most frequent targets and their counts (default: 0)\n"
+			<< "    --n_most_freq_targets_for_stats <int>             - use at most n_most_freq_targets_for_stats for each contingency table, 0 means use all (default: 0)\n"
+			<< "    --opt_train_fraction <double> (0.0;1.0)           - use this fraction to create train X from contingency table\n"
+			<< "    --opt_num_inits <int>                             - the number of altMaximize runs (default: 10)\n"
+			<< "    --opt_num_iters <int>                             - the number of iteration in altMaximize (default: 50)\n"
+			<< "    --num_rand_cf <int>                               - the number of rand cf (default: 50)\n"
+			<< "    --num_splits <int>                                - the number of contingency table splits (default: 1)\n"
+			<< "    --anchor_samples_threshold <int>                  - filter out all anchors for which the number of unique samples is <= anchor_samples_threshold\n"
+			<< "    --anchor_list <string>                            - path to text file containing anchors separated by whitespaces, only anchors from this file will be processed\n"
+			<< "    --cjs_out <string>                                - path to output text file where Cjs will be stored\n"
+			<< "    --max_pval_opt_for_Cjs <double>                   - dump only Cjs for anchors that have pval_opt <= max_pval_opt_for_Cjs\n"
+			<< "    --dump_sample_anchor_target_count_txt <string>    - dump merged anchors in textual representation\n"
+			<< "    --dump_sample_anchor_target_count_binary <string> - dump merged anchors in textual satc format\n"
+			<< "    --without_alt_max                                 - disable alt max computation\n"
+			<< "    --with_effect_size_cts                            - compute effect_size_cts\n"
+			<< "    --with_pval_asymp_opt                             - compute pval_asymp_opt\n"
+			<< "    --compute_also_old_base_pvals                     - compute old base pvals\n"
+			<< "    --sample_names <path>                             - path for decode sample id, each line should contain <sample_name> <sample_id>\n"
+			<< "    --cell_type_samplesheet <path>                    - path for mapping barcode to cell type, is used Helmert-based supervised mode is turned on\n"
+			<< "    --Cjs_samplesheet <path>                          - path for file with predefined Cjs for non-10X supervised mode\n"
+			<< "    --format <string>                                 - output format when txt dump, available options: satc, splash (default: satc)\n";
 	}
 };
 
@@ -246,12 +225,19 @@ Params read_params(int argc, char** argv)
 			std::string tmp = argv[++i];
 			res.num_rand_cf = std::stoull(tmp);
 		}
+		if (param == "--num_splits") {
+			std::string tmp = argv[++i];
+			res.num_splits = std::stoull(tmp);
+		}
 		if (param == "--without_alt_max") {
 			res.without_alt_max = true;
 		}
 		if (param == "--with_effect_size_cts") {
 			res.with_effect_size_cts = true;
 		}
+		if (param == "--with_pval_asymp_opt") {
+			res.with_pval_asymp_opt = true;
+		}
 		if (param == "--compute_also_old_base_pvals") {
 			res.compute_also_old_base_pvals = true;
 		}
@@ -277,9 +263,13 @@ Params read_params(int argc, char** argv)
 		if (param == "--is_10X") {
 			res.is_10X = true;
 		}
-		if (param == "--run_mode") {
-			res.run_mode = run_mode_from_string(argv[++i]);
+		if (param == "--dump_sample_anchor_target_count_txt") {
+			res.dump_sample_anchor_target_count_txt = argv[++i];
 		}
+		if (param == "--dump_sample_anchor_target_count_binary") {
+			res.dump_sample_anchor_target_count_binary = argv[++i];
+		}
+
 		if (param == "--format")
 			res.format = RecFmtConv::from_string(argv[++i]);
 	}
@@ -426,7 +416,7 @@ class Bin {
 };
 
 
-void write_out_header(std::ofstream& out, bool without_alt_max, bool with_effect_size_cts, bool compute_also_old_base_pvals, bool is_removing_least_freq_targets_enabled, uint32_t n_most_freq_targets, CBCToCellType* cbc_to_cell_type, Non10XSupervised* non_10X_supervised) {
+void write_out_header(std::ofstream& out, bool without_alt_max, bool with_effect_size_cts, bool with_pval_asymp_opt, bool compute_also_old_base_pvals, bool is_removing_least_freq_targets_enabled, uint32_t n_most_freq_targets, CBCToCellType* cbc_to_cell_type, Non10XSupervised* non_10X_supervised) {
 	//mkokot_TODO: czy doimplementowac zakomentowane?
 	out
 		<< "anchor" << "\t";
@@ -463,8 +453,10 @@ void write_out_header(std::ofstream& out, bool without_alt_max, bool with_effect
 	}
 	if (!without_alt_max) {
 		out
-			<< "effect_size_bin" << "\t"
-			<< "pval_asymp_base" << "\t";
+			<< "effect_size_bin" << "\t";
+
+		if (with_pval_asymp_opt)
+			out << "pval_asymp_opt" << "\t";
 	}
 	out
 		//<< "effect_size_cts_SVD" << "\t"					// TODO: ?
@@ -512,6 +504,7 @@ void write_out_rec(
 	uint64_t n_unique_samples,
 	bool without_alt_max,
 	bool with_effect_size_cts,
+	bool with_pval_asymp_opt,
 	bool compute_also_old_base_pvals,
 	bool is_removing_least_freq_targets_enabled,
 	uint32_t n_most_freq_targets,
@@ -595,9 +588,9 @@ void write_out_rec(
 		out << anchor_stats.Cjs_num_rest << "\t";
 	}
 	if (!without_alt_max) {
-		out
-			<< anchor_stats.effect_size_bin << "\t"
-			<< anchor_stats.pval_asymp_base << "\t";
+		out << anchor_stats.effect_size_bin << "\t";
+		if (with_pval_asymp_opt)
+			out << anchor_stats.pval_asymp_opt << "\t";
 	}
 	out
 		//<< "effect_size_cts_SVD" << "\t"					// TODO: ?
@@ -662,6 +655,7 @@ class StatsWriter : public IAnchorProcessor {
 	std::ofstream out;
 	bool without_alt_max;
 	bool with_effect_size_cts;
+	bool with_pval_asymp_opt;
 	bool compute_also_old_base_pvals;
 	bool is_removing_least_freq_targets_enabled;
 	uint32_t n_most_freq_targets;
@@ -669,6 +663,7 @@ class StatsWriter : public IAnchorProcessor {
 	int opt_num_inits;
 	int opt_num_iters;
 	size_t num_rand_cf;
+	size_t num_splits;
 	CExtraStats extra_stats;
 	AnchorStats anchor_stats;
 	CjWriter cj_writer;
@@ -684,6 +679,7 @@ class StatsWriter : public IAnchorProcessor {
 		const std::string& outpath,
 		bool without_alt_max,
 		bool with_effect_size_cts,
+		bool with_pval_asymp_opt,
 		bool compute_also_old_base_pvals,
 		bool is_removing_least_freq_targets_enabled,
 		uint32_t n_most_freq_targets,
@@ -691,6 +687,7 @@ class StatsWriter : public IAnchorProcessor {
 		int opt_num_inits,
 		int opt_num_iters,
 		size_t num_rand_cf,
+		size_t num_splits,
 		const std::string& cjs_out,
 		bool is_10X,
 		size_t anchor_len_symbols,
@@ -703,6 +700,7 @@ class StatsWriter : public IAnchorProcessor {
 //		out(outpath),
 		without_alt_max(without_alt_max),
 		with_effect_size_cts(with_effect_size_cts),
+		with_pval_asymp_opt(with_pval_asymp_opt),
 		compute_also_old_base_pvals(compute_also_old_base_pvals),
 		is_removing_least_freq_targets_enabled(is_removing_least_freq_targets_enabled),
 		n_most_freq_targets(n_most_freq_targets),
@@ -710,6 +708,7 @@ class StatsWriter : public IAnchorProcessor {
 		opt_num_inits(opt_num_inits),
 		opt_num_iters(opt_num_iters),
 		num_rand_cf(num_rand_cf),
+		num_splits(num_splits),
 		cj_writer(cjs_out, is_10X, anchor_len_symbols, barcode_len_symbols, sample_names),
 		max_pval_opt_for_Cjs(max_pval_opt_for_Cjs),
 		cbc_to_cell_type(cbc_to_cell_type),
@@ -726,6 +725,7 @@ class StatsWriter : public IAnchorProcessor {
 			out,
 			without_alt_max,
 			with_effect_size_cts,
+			with_pval_asymp_opt,
 			compute_also_old_base_pvals,
 			is_removing_least_freq_targets_enabled,
 			n_most_freq_targets,
@@ -764,12 +764,14 @@ class StatsWriter : public IAnchorProcessor {
 					anchor_stats,
 					without_alt_max,
 					with_effect_size_cts,
+					with_pval_asymp_opt,
 					compute_also_old_base_pvals,
 					n_most_freq_targets,
 					opt_train_fraction,
 					opt_num_inits,
 					opt_num_iters,
 					num_rand_cf,
+					num_splits,
 					cj_writer,
 					max_pval_opt_for_Cjs,
 					cbc_to_cell_type,
@@ -788,6 +790,7 @@ class StatsWriter : public IAnchorProcessor {
 			unique_samples.size(),
 			without_alt_max,
 			with_effect_size_cts,
+			with_pval_asymp_opt,
 			compute_also_old_base_pvals,
 			is_removing_least_freq_targets_enabled,
 			n_most_freq_targets,
@@ -830,17 +833,7 @@ class SatcWriter : public IAnchorProcessor {
 		out_header.serialize(out);
 	}
 
-	void ProcessAnchor(
-		Anchor&& anchor,
-		bool is_10X,
-		size_t anchor_len_symbols,
-		size_t target_len_symbols,
-		size_t n_uniqe_targets,
-		size_t tot_cnt,
-		size_t n_uniqe_targets_before_filter,
-		size_t tot_cnt_before_filter,
-		const std::unordered_set<uint64_t>& unique_samples
-	)  override {
+	void ProcessAnchor(const Anchor& anchor) {
 		Record rec;
 		rec.anchor = anchor.anchor;
 
@@ -852,6 +845,20 @@ class SatcWriter : public IAnchorProcessor {
 
 			rec.serialize(out, out_header);
 		}
+	}
+
+	void ProcessAnchor(
+		Anchor&& anchor,
+		bool is_10X,
+		size_t anchor_len_symbols,
+		size_t target_len_symbols,
+		size_t n_uniqe_targets,
+		size_t tot_cnt,
+		size_t n_uniqe_targets_before_filter,
+		size_t tot_cnt_before_filter,
+		const std::unordered_set<uint64_t>& unique_samples
+	)  override {
+		ProcessAnchor(anchor);
 
 		anchor.data.clear();
 		anchor.data.shrink_to_fit();
@@ -895,17 +902,9 @@ class SatcDumpWriter : public IAnchorProcessor {
 		header.target_len_symbols = target_len_symbols;
 		header.gap_len_symbols = gap_len_symbols;
 	}
-	void ProcessAnchor(
-		Anchor&& anchor,
-		bool is_10X,
-		size_t anchor_len_symbols,
-		size_t target_len_symbols,
-		size_t n_uniqe_targets,
-		size_t tot_cnt,
-		size_t n_uniqe_targets_before_filter,
-		size_t tot_cnt_before_filter,
-		const std::unordered_set<uint64_t>& unique_samples
-	) override {
+
+	void ProcessAnchor(const Anchor& anchor)
+	{
 		Record rec;
 		rec.anchor = anchor.anchor;
 
@@ -917,12 +916,75 @@ class SatcDumpWriter : public IAnchorProcessor {
 
 			rec.print(out, header, format, sample_name_decoder);
 		}
+	}
+
+	void ProcessAnchor(
+		Anchor&& anchor,
+		bool is_10X,
+		size_t anchor_len_symbols,
+		size_t target_len_symbols,
+		size_t n_uniqe_targets,
+		size_t tot_cnt,
+		size_t n_uniqe_targets_before_filter,
+		size_t tot_cnt_before_filter,
+		const std::unordered_set<uint64_t>& unique_samples
+	) override {
+		ProcessAnchor(anchor);
 
 		anchor.data.clear();
 		anchor.data.shrink_to_fit();
 	}
 };
 
+class CombinedAnchorWriter : public IAnchorProcessor {
+	std::unique_ptr<SatcDumpWriter> satc_dump_writer;
+	std::unique_ptr<SatcWriter> satc_writer;
+	std::unique_ptr<StatsWriter> stats_writer;
+
+public:
+	CombinedAnchorWriter(
+		std::unique_ptr<SatcDumpWriter>&& satc_dump_writer,
+		std::unique_ptr<SatcWriter>&& satc_writer,
+		std::unique_ptr<StatsWriter>&& stats_writer)
+		:
+		satc_dump_writer(std::move(satc_dump_writer)),
+		satc_writer(std::move(satc_writer)),
+		stats_writer(std::move(stats_writer))
+	{
+
+	}
+
+	void ProcessAnchor(
+		Anchor&& anchor,
+		bool is_10X,
+		size_t anchor_len_symbols,
+		size_t target_len_symbols,
+		size_t n_uniqe_targets,
+		size_t tot_cnt,
+		size_t n_uniqe_targets_before_filter,
+		size_t tot_cnt_before_filter,
+		const std::unordered_set<uint64_t>& unique_samples
+	)  override {
+		if (satc_dump_writer)
+			satc_dump_writer->ProcessAnchor(anchor);
+		if (satc_writer)
+			satc_writer->ProcessAnchor(anchor);
+
+		//must be last because it destroys anchor content
+		if (stats_writer)
+			stats_writer->ProcessAnchor(std::move(anchor),
+				is_10X,
+				anchor_len_symbols,
+				target_len_symbols,
+				n_uniqe_targets,
+				tot_cnt,
+				n_uniqe_targets_before_filter,
+				tot_cnt_before_filter,
+				unique_samples
+			);
+	}
+};
+
 bool get_top_anchors(std::vector<std::unique_ptr<Bin>>& bins, AcceptedAnchors& anchor_filter, std::vector<uint64_t>& top_anchors) {
 	for (size_t bin_id = 0; bin_id < bins.size(); ++bin_id) {
 		uint64_t anchor;
@@ -954,8 +1016,9 @@ bool get_top_anchors(std::vector<std::unique_ptr<Bin>>& bins, AcceptedAnchors& a
 }
 
 std::unique_ptr<IAnchorProcessor> get_anchor_processor(
-	RunMode run_mode,
 	const std::string& outpath,
+	const std::string& dump_sample_anchor_target_count_txt,
+	const std::string dump_sample_anchor_target_count_binary,
 	RecFmt format,
 	uint8_t sample_id_size_bytes,
 	uint8_t barcode_size_bytes,
@@ -969,6 +1032,7 @@ std::unique_ptr<IAnchorProcessor> get_anchor_processor(
 	const std::string& sample_names,
 	bool without_alt_max,
 	bool with_effect_size_cts,
+	bool with_pval_asymp_opt,
 	bool compute_also_old_base_pvals,
 	bool is_removing_least_freq_targets_enabled,
 	uint32_t n_most_freq_targets,
@@ -976,16 +1040,19 @@ std::unique_ptr<IAnchorProcessor> get_anchor_processor(
 	int opt_num_inits,
 	int opt_num_iters,
 	size_t num_rand_cf,
+	size_t num_splits,
 	const std::string& cjs_out,
 	bool is_10X,
 	double max_pval_opt_for_Cjs,
 	CBCToCellType* cbc_to_cell_type,
 	Non10XSupervised* non_10X_supervised) {
-	switch (run_mode)
-	{
-	case RunMode::JustMerge:
-		return std::make_unique<SatcWriter>(
-			outpath,
+
+	std::unique_ptr<SatcDumpWriter> satc_dump_writer;
+
+	if (dump_sample_anchor_target_count_txt != "") {
+		satc_dump_writer = std::make_unique<SatcDumpWriter>(
+			dump_sample_anchor_target_count_txt,
+			format,
 			sample_id_size_bytes,
 			barcode_size_bytes,
 			anchor_size_bytes,
@@ -994,12 +1061,16 @@ std::unique_ptr<IAnchorProcessor> get_anchor_processor(
 			barcode_len_symbols,
 			anchor_len_symbols,
 			target_len_symbols,
-			gap_len_symbols
+			gap_len_symbols,
+			sample_names
 			);
-	case RunMode::JustMergeAndDump:
-		return std::make_unique<SatcDumpWriter>(
-			outpath,
-			format,
+	}
+
+	std::unique_ptr<SatcWriter> satc_writer;
+
+	if (dump_sample_anchor_target_count_binary != "") {
+		satc_writer = std::make_unique<SatcWriter>(
+			dump_sample_anchor_target_count_binary,
 			sample_id_size_bytes,
 			barcode_size_bytes,
 			anchor_size_bytes,
@@ -1008,34 +1079,36 @@ std::unique_ptr<IAnchorProcessor> get_anchor_processor(
 			barcode_len_symbols,
 			anchor_len_symbols,
 			target_len_symbols,
-			gap_len_symbols,
-			sample_names
+			gap_len_symbols
 			);
-	case RunMode::CalcStats:
-		return std::make_unique<StatsWriter>(
-			outpath,
-			without_alt_max,
-			with_effect_size_cts,
-			compute_also_old_base_pvals,
-			is_removing_least_freq_targets_enabled,
-			n_most_freq_targets,
-			opt_train_fraction,
-			opt_num_inits,
-			opt_num_iters,
-			num_rand_cf,
-			cjs_out,
-			is_10X,
-			anchor_len_symbols,
-			barcode_len_symbols,
-			sample_names,
-			max_pval_opt_for_Cjs,
-			cbc_to_cell_type,
-			non_10X_supervised);
-		break;
-	default:
-		std::cerr << "Error: this is not supported, mode " << to_string(run_mode) << "\n";
-		exit(1);
 	}
+
+	std::unique_ptr<StatsWriter> stats_writer = std::make_unique<StatsWriter>(
+		outpath,
+		without_alt_max,
+		with_effect_size_cts,
+		with_pval_asymp_opt,
+		compute_also_old_base_pvals,
+		is_removing_least_freq_targets_enabled,
+		n_most_freq_targets,
+		opt_train_fraction,
+		opt_num_inits,
+		opt_num_iters,
+		num_rand_cf,
+		num_splits,
+		cjs_out,
+		is_10X,
+		anchor_len_symbols,
+		barcode_len_symbols,
+		sample_names,
+		max_pval_opt_for_Cjs,
+		cbc_to_cell_type,
+		non_10X_supervised);
+
+	return std::make_unique<CombinedAnchorWriter>(
+		std::move(satc_dump_writer),
+		std::move(satc_writer),
+		std::move(stats_writer));
 }
 
 void run_non_10X(const Params& params) {
@@ -1080,8 +1153,10 @@ void run_non_10X(const Params& params) {
 			max_sample_id_size_bytes = bin_i_header.sample_id_size_bytes;
 	}
 
-	std::unique_ptr<IAnchorProcessor> anchor_processor = get_anchor_processor(params.run_mode,
+	std::unique_ptr<IAnchorProcessor> anchor_processor = get_anchor_processor(
 		params.outpath,
+		params.dump_sample_anchor_target_count_txt,
+		params.dump_sample_anchor_target_count_binary,
 		params.format,
 		max_sample_id_size_bytes,
 		bin0_header.barcode_size_bytes,
@@ -1095,6 +1170,7 @@ void run_non_10X(const Params& params) {
 		params.sample_names,
 		params.without_alt_max,
 		params.with_effect_size_cts,
+		params.with_pval_asymp_opt,
 		params.compute_also_old_base_pvals,
 		params.n_most_freq_targets_for_stats != 0,
 		params.n_most_freq_targets,
@@ -1102,6 +1178,7 @@ void run_non_10X(const Params& params) {
 		params.opt_num_inits,
 		params.opt_num_iters,
 		params.num_rand_cf,
+		params.num_splits,
 		params.cjs_out,
 		false,
 		params.max_pval_opt_for_Cjs,
@@ -1248,8 +1325,9 @@ void run_10X(const Params& params) {
 	timer.Start();
 
 	std::unique_ptr<IAnchorProcessor> anchor_processor = get_anchor_processor(
-		params.run_mode,
 		params.outpath,
+		params.dump_sample_anchor_target_count_txt,
+		params.dump_sample_anchor_target_count_binary,
 		params.format,
 		max_sample_id_size_bytes,
 		header.barcode_size_bytes,
@@ -1263,6 +1341,7 @@ void run_10X(const Params& params) {
 		params.sample_names,
 		params.without_alt_max,
 		params.with_effect_size_cts,
+		params.with_pval_asymp_opt,
 		params.compute_also_old_base_pvals,
 		false, //mkokot_TODO: add support to remove least freq targets from contignency table for 10X
 		params.n_most_freq_targets,
@@ -1270,6 +1349,7 @@ void run_10X(const Params& params) {
 		params.opt_num_inits,
 		params.opt_num_iters,
 		params.num_rand_cf,
+		params.num_splits,
 		params.cjs_out,
 		true,
 		params.max_pval_opt_for_Cjs,
diff --git a/src/sig_anch/sig_anch.cpp b/src/sig_anch/sig_anch.cpp
index f83b8cd..856ddf6 100644
--- a/src/sig_anch/sig_anch.cpp
+++ b/src/sig_anch/sig_anch.cpp
@@ -129,7 +129,7 @@ void correct_pvals_stream();
 bool usage()
 {
 	cerr << "sig_anch\n";
-	NOMAD_VER_PRINT(cerr);
+	SPLASH_VER_PRINT(cerr);
 	cerr << "Usage: sig_anch [options]" << endl
 		<< "Options:" << endl
 		<< "  --fdr_threshold <value>" << endl
diff --git a/src/nomad.py b/src/splash.py
similarity index 77%
rename from src/nomad.py
rename to src/splash.py
index 786038c..8e01be4 100755
--- a/src/nomad.py
+++ b/src/splash.py
@@ -18,11 +18,11 @@ def _split_lines(self, text, width):
         # this is the RawTextHelpFormatter._split_lines
         return argparse.HelpFormatter._split_lines(self, text, width)
 
-NOMAD_VERSION="2.0.0"
+SPLASH_VERSION="2.1.4"
 
 parser = argparse.ArgumentParser(
-                    prog = "nomad",
-                    description = "Welcome to NOMAD\nVersion: " + NOMAD_VERSION,
+                    prog = "splash",
+                    description = "Welcome to SPLASH\nVersion: " + SPLASH_VERSION,
                     #epilog = 'Text at the bottom of help',
                     #formatter_class=SmartFormatter
                     formatter_class=argparse.ArgumentDefaultsHelpFormatter
@@ -40,30 +40,39 @@ def _split_lines(self, text, width):
 
 group_filters_and_thresholds = parser.add_argument_group('Filters and thresholds')
 group_filters_and_thresholds.add_argument("--poly_ACGT_len", default=8, type=int, help="filter out all anchors containing poly(ACGT) of length at least <poly_ACGT_len> (0 means no filtering)")
+group_filters_and_thresholds.add_argument("--artifacts", default="", type=str, help="path to artifacts, each anchor containing artifact will be filtered out")
+group_filters_and_thresholds.add_argument("--dont_filter_illumina_adapters", default=False, action='store_true', help="if used anchors containing Illumina adapters will not be filtered out")
 group_filters_and_thresholds.add_argument("--anchor_unique_targets_threshold", default=1, type=int, help="filter out all anchors for which the number of unique targets is <= anchor_unique_targets_threshold")
 group_filters_and_thresholds.add_argument("--anchor_count_threshold", default=50, type=int, help="filter out all anchors for which the total count <= anchor_count_threshold")
 group_filters_and_thresholds.add_argument("--anchor_samples_threshold", default=1, type=int, help="filter out all anchors for which the number of unique samples is <= anchor_samples_threshold")
 group_filters_and_thresholds.add_argument("--anchor_sample_counts_threshold", default=5, type=int, help="filter out anchor from sample if its count in this sample is <= anchor_sample_counts_threshold")
 group_filters_and_thresholds.add_argument("--n_most_freq_targets_for_stats", default=0, type=int, help="use at most n_most_freq_targets_for_stats for each contignency table, 0 means use all")
 group_filters_and_thresholds.add_argument("--fdr_threshold", default=0.05, type=float, help="keep anchors having corrected p-val below this value")
+group_filters_and_thresholds.add_argument("--min_hamming_threshold", default=0, type=int, help="keep only anchors with a pair of targets that differ by >= min_hamming_threshold")
 
 group_additional_out = parser.add_argument_group('Additional output configuration')
 group_additional_out.add_argument("--dump_Cjs", default=False, action='store_true', help="output Cjs")
 group_additional_out.add_argument("--max_pval_opt_for_Cjs", default=0.10, type=float, help="dump only Cjs for anchors that have pval_opt <= max_pval_opt_for_Cjs")
-group_additional_out.add_argument("--n_most_freq_targets", default=2, type=int, help="number of most frequent tragets printed per each anchor in stats mode")
+group_additional_out.add_argument("--n_most_freq_targets", default=2, type=int, help="number of most frequent tragets printed per each anchor")
 group_additional_out.add_argument("--with_effect_size_cts", default=False, action='store_true', help="if set effect_size_cts will be computed")
+group_additional_out.add_argument("--with_pval_asymp_opt", default=False, action='store_true', help="if set pval_asymp_opt will be computed")
 group_additional_out.add_argument("--sample_name_to_id", default="sample_name_to_id.mapping.txt", type=str, help="file name with mapping sample name <-> sammpe id")
 group_additional_out.add_argument("--dump_sample_anchor_target_count_txt", default=False, action='store_true', help="if set contignency tables will be generated in text format")
 group_additional_out.add_argument("--dump_sample_anchor_target_count_binary", default=False, action='store_true', help="if set contignency tables will be generated in binary (satc) format, to convert to text format later satc_dump program may be used, it may take optionally maping from id to sample_name (--sample_names param)")
+group_additional_out.add_argument("--supervised_test_samplesheet", default="", help="if used script for finding/visualizing anchors with metadata-dependent variation will be run (forces --dump_sample_anchor_target_count_binary)")
+group_additional_out.add_argument("--supervised_test_anchor_sample_fraction_cutoff", default=0.4, type=float, help="the cutoff for the minimum fraction of samples for each anchor")
+group_additional_out.add_argument("--supervised_test_num_anchors", default=20000, type=int, help="maximum number of anchors to be tested example")
 
 group_tuning_stats = parser.add_argument_group('Tuning statistics computation')
 group_tuning_stats.add_argument("--opt_num_inits", default=10, type=int, help="the number of altMaximize runs")
 group_tuning_stats.add_argument("--opt_num_iters", default=50, type=int, help="the number of iteration in altMaximize")
 group_tuning_stats.add_argument("--num_rand_cf", default=50, type=int, help="the number of rand cf")
+group_tuning_stats.add_argument("--num_splits", default=1, type=int, help="the number of contingency table splits")
 group_tuning_stats.add_argument("--opt_train_fraction", default=0.25, type=float, help="in calc_stats mode use this fraction to create train X from contingency table")
+group_tuning_stats.add_argument("--without_alt_max", default=False, action='store_true', help="if set int alt max and related stats will not be computed")
 
 group_technical = parser.add_argument_group('Technical and performance-related')
-group_technical.add_argument("--bin_path", default="./", type=str, help="path to a directory where satc, satc_dump, satc_merge, sig_anch, kmc, kmc_tools binaries are (if any not found there nomad will check if installed and use installed)")
+group_technical.add_argument("--bin_path", default="bin", type=str, help="path to a directory where satc, satc_dump, satc_merge, sig_anch, kmc, kmc_tools binaries are (if any not found there splash will check if installed and use installed)")
 group_technical.add_argument("--tmp_dir", default="", type=str, help="path to a directory where temporary files will be stored")
 group_technical.add_argument("--n_threads_stage_1", default=4, type=int, help="number of threads for the first stage, too large value is not recomended because of intensive disk access here, but may be profitable if there is a lot of small size samples in the input")
 group_technical.add_argument("--n_threads_stage_1_internal", default=8, type=int, help="number of threads per each stage 1 thread")
@@ -90,6 +99,8 @@ def _split_lines(self, text, width):
 target_len = args.target_len
 gap_len = args.gap_len
 poly_ACGT_len=args.poly_ACGT_len
+artifacts = args.artifacts
+dont_filter_illumina_adapters = args.dont_filter_illumina_adapters
 anchor_unique_targets_threshold = args.anchor_unique_targets_threshold
 anchor_count_threshold = args.anchor_count_threshold
 anchor_samples_threshold = args.anchor_samples_threshold
@@ -99,15 +110,22 @@ def _split_lines(self, text, width):
 opt_num_inits=args.opt_num_inits
 opt_num_iters=args.opt_num_iters
 num_rand_cf=args.num_rand_cf
+num_splits=args.num_splits
 opt_train_fraction=args.opt_train_fraction
 kmc_use_RAM_only_mode = args.kmc_use_RAM_only_mode
 kmc_max_mem_GB = args.kmc_max_mem_GB
+without_alt_max = args.without_alt_max
 with_effect_size_cts = args.with_effect_size_cts
+with_pval_asymp_opt = args.with_pval_asymp_opt
 sample_name_to_id = args.sample_name_to_id
 dump_sample_anchor_target_count_txt = args.dump_sample_anchor_target_count_txt
 dump_sample_anchor_target_count_binary = args.dump_sample_anchor_target_count_binary
 pvals_correction_col_name = args.pvals_correction_col_name
 fdr_threshold = args.fdr_threshold
+min_hamming_threshold = args.min_hamming_threshold
+supervised_test_samplesheet = args.supervised_test_samplesheet
+supervised_test_anchor_sample_fraction_cutoff = args.supervised_test_anchor_sample_fraction_cutoff
+supervised_test_num_anchors = args.supervised_test_num_anchors
 outname_prefix=args.outname_prefix
 dump_Cjs=args.dump_Cjs
 max_pval_opt_for_Cjs=args.max_pval_opt_for_Cjs
@@ -116,15 +134,30 @@ def _split_lines(self, text, width):
 input_file=args.input_file
 tmp_dir=args.tmp_dir
 
+if supervised_test_samplesheet != "":
+    if run_10X:
+        print("Error: --supervised_test_samplesheet cannot be used with --run_10X in the current release")
+        sys.exit(1)
+    if dump_sample_anchor_target_count_binary == False:
+        print("Warning: --supervised_test_samplesheet forces --dump_sample_anchor_target_count_binary")
+        dump_sample_anchor_target_count_binary = True
+    if shutil.which("Rscript") is None:
+        print("Error: Rscript must be installed to run splash with --supervised_test_samplesheet")
+        sys.exit(1)
+
+    if n_most_freq_targets < 2:
+        print("Warning: --supervised_test_samplesheet requires at least 2 n_most_freq_targets, setting --n_most_freq_targets 2")
+        n_most_freq_targets = 2
+
 if tmp_dir == "":
-    tmp_dir="nomad-tmp-"+uuid.uuid4().hex
+    tmp_dir="splash-tmp-"+uuid.uuid4().hex
 
 
 def get_cur_time():    
     return strftime("%Y-%m-%d %H:%M:%S", localtime())
 
-print("Welcome to NOMAD")
-print("Version: ", NOMAD_VERSION)
+print("Welcome to SPLASH")
+print("Version: ", SPLASH_VERSION)
 print("Current time:", get_cur_time(), flush=True)
 
 
@@ -193,6 +226,8 @@ def get_prog_or_fail(prog):
 sig_anch=get_prog_or_fail("sig_anch")
 kmc=get_prog_or_fail("kmc")
 kmc_tools=get_prog_or_fail("kmc_tools")
+if supervised_test_samplesheet != "":
+    supervised_test = get_prog_or_fail("supervised_test.R")
 
 was_error = False
 
@@ -235,8 +270,8 @@ def run_cmd(cmd, out, err):
     for line in out.split('\n'):
         if line.startswith("Version:"):
             ver = line.split()[-1]
-            if ver != NOMAD_VERSION:
-                print(f"Error: {executable_path} version is {ver} while this script is {NOMAD_VERSION}. Please use the same version.")
+            if ver != SPLASH_VERSION:
+                print(f"Error: {executable_path} version is {ver} while this script is {SPLASH_VERSION}. Please use the same version.")
                 sys.exit(1)
             else:
                 version_is_OK=True
@@ -248,7 +283,7 @@ def run_cmd(cmd, out, err):
 if not os.path.exists(logs_dir):
     os.makedirs(logs_dir)
 
-with open(f"{logs_dir}/nomad-cmd.log", "w") as f:
+with open(f"{logs_dir}/splash-cmd.log", "w") as f:
     f.writelines(" ".join(sys.argv))
 
 inputs = []
@@ -276,6 +311,8 @@ def remove_kmc_output(path):
 print("Current time:", get_cur_time(), flush=True)
 
 def stage_1_task(id, input, out, err):
+    _artifacts_param = f"--artifacts {artifacts}" if artifacts != "" else ""
+    _dont_filter_illumina_adapters_param = "--dont_filter_illumina_adapters" if dont_filter_illumina_adapters else ""
     fname = input[0]
     sample_name = input[1]
     
@@ -300,7 +337,17 @@ def stage_1_task(id, input, out, err):
     if clean_up:
         remove_kmc_output(f"{tmp_dir}/{sample_name}")
     
-    cmd = f"{satc} --anchor_len {anchor_len} --target_len {target_len} --n_bins {n_bins} --anchor_sample_counts_threshold {anchor_sample_counts_threshold} --poly_ACGT_len {poly_ACGT_len} {tmp_dir}/{sample_name} {tmp_dir}/{sample_name}.sorted {id}"
+    cmd = f"{satc} \
+        --anchor_len {anchor_len} \
+        --target_len {target_len} \
+        --n_bins {n_bins} \
+        --anchor_sample_counts_threshold {anchor_sample_counts_threshold} \
+        --min_hamming_threshold {min_hamming_threshold} \
+        --poly_ACGT_len {poly_ACGT_len} \
+        {_artifacts_param} \
+        {_dont_filter_illumina_adapters_param} \
+        {tmp_dir}/{sample_name} \
+        {tmp_dir}/{sample_name}.sorted {id}"
     run_cmd(cmd, out, err)
     
     if clean_up:
@@ -367,10 +414,7 @@ def stage_1_worker(tid):
 # For each bin merge all samples
 # output: file {n_bins} files, where each have
 # its (sample, anchor, target, count)
-# this stage may be (and is intended to be) used to compute pvals
-# the current state of this may be enabled with --run_mode parameter
-# in the case when --run_mode is set to just_merge_and_dump
-# the format of the dump may be configured with --format parameter
+# this stage may be is used to compute pvals and other stats
 ###############################################################################
 
 print("Stage 1 done")
@@ -396,12 +440,26 @@ def stage_2_task(bin_id, out, err):
         for x in satc_merge_inputs:
             f.write(f"{x}\n")
     
+    _without_alt_max_param = "--without_alt_max" if without_alt_max else ""
     _with_effect_size_cts_param = "--with_effect_size_cts" if with_effect_size_cts else ""
+    _with_pval_asymp_opt_param = "--with_pval_asymp_opt" if with_pval_asymp_opt else ""
 
     _cjs_out_param = f"--cjs_out {Cjs_dir}/bin{bin_id}.cjs" if dump_Cjs else ""
 
+    _dump_sample_anchor_target_count_txt_param = ""
+    if dump_sample_anchor_target_count_txt:
+        dump_dir = f"{outname_prefix}_dumps"
+        _dump_sample_anchor_target_count_txt_param = f"--dump_sample_anchor_target_count_txt {dump_dir}/bin{bin_id}.satc.dump"
+
+    _dump_sample_anchor_target_count_binary_param = ""
+    if dump_sample_anchor_target_count_binary:
+        satc_dir = f"{outname_prefix}_satc"
+        _dump_sample_anchor_target_count_binary_param = f"--dump_sample_anchor_target_count_binary {satc_dir}/bin{bin_id}.satc"
+
     cmd=f"{satc_merge} \
+    {_without_alt_max_param} \
     {_with_effect_size_cts_param} \
+    {_with_pval_asymp_opt_param} \
     {_cjs_out_param} \
     --max_pval_opt_for_Cjs {max_pval_opt_for_Cjs} \
     --anchor_count_threshold {anchor_count_threshold} \
@@ -412,43 +470,16 @@ def stage_2_task(bin_id, out, err):
     --opt_num_inits {opt_num_inits} \
     --opt_num_iters {opt_num_iters} \
     --num_rand_cf {num_rand_cf} \
+    --num_splits {num_splits} \
     --opt_train_fraction {opt_train_fraction} \
-    --run_mode calc_stats \
+    {_dump_sample_anchor_target_count_txt_param} \
+    {_dump_sample_anchor_target_count_binary_param} \
     --sample_names {sample_name_to_id} \
+    --format satc \
     {anchor_list_param} \
     {tmp_dir}/{outname_prefix}.bin{bin_id}.stats.tsv {in_file_name}"
     run_cmd(cmd, out, err)
 
-    if dump_sample_anchor_target_count_txt:
-        dump_dir = f"{outname_prefix}_dumps"
-        if not os.path.exists(dump_dir):
-            os.makedirs(dump_dir)
-
-        cmd=f"{satc_merge} \
-        --anchor_count_threshold {anchor_count_threshold} \
-        --anchor_samples_threshold {anchor_samples_threshold} \
-        --anchor_unique_targets_threshold {anchor_unique_targets_threshold} \
-        --run_mode just_merge_and_dump \
-        --sample_names {sample_name_to_id} \
-        --format satc \
-        {anchor_list_param} \
-        {dump_dir}/bin{bin_id}.satc.dump {in_file_name}"
-        run_cmd(cmd, out, err)
-
-    if dump_sample_anchor_target_count_binary:
-        satc_dir = f"{outname_prefix}_satc"
-        if not os.path.exists(satc_dir):
-            os.makedirs(satc_dir)
-
-        cmd=f"{satc_merge} \
-        --anchor_count_threshold {anchor_count_threshold} \
-        --anchor_samples_threshold {anchor_samples_threshold} \
-        --anchor_unique_targets_threshold {anchor_unique_targets_threshold} \
-        --run_mode just_merge \
-        {anchor_list_param} \
-        {satc_dir}/bin{bin_id}.satc {in_file_name}"
-        run_cmd(cmd, out, err)
-
     if clean_up:
         os.remove(in_file_name)
         for f in satc_merge_inputs:
@@ -525,5 +556,26 @@ def stage_2_worker(tid):
     os.rmdir(tmp_dir)
 check_and_handle_error()
 print("Stage 3 done")
-print("NOMAD finished!")
+
+if supervised_test_samplesheet != "":
+    print("Starting supervised_test")
+    print("Current time:", get_cur_time(), flush=True)
+    cmd = f"Rscript {supervised_test} \
+        {n_bins} \
+        {sample_name_to_id} \
+        {satc_dump} \
+        {outname_prefix}_satc \
+        {outname_prefix}.after_correction.scores.tsv \
+        . \
+        {supervised_test_samplesheet} \
+        supervised_test \
+        {supervised_test_anchor_sample_fraction_cutoff} \
+        {supervised_test_num_anchors}"
+
+    stdoutfile = open(f"{logs_dir}/supervised_test.log", "w")
+    run_cmd(cmd, stdoutfile, stdoutfile)
+    stdoutfile.close()
+    print("supervised_test done")
+
+print("SPLASH finished!")
 print("Current time:", get_cur_time(), flush=True)
diff --git a/src/supervised_test/supervised_test.R b/src/supervised_test/supervised_test.R
new file mode 100644
index 0000000..d26a413
--- /dev/null
+++ b/src/supervised_test/supervised_test.R
@@ -0,0 +1,156 @@
+if (!require("data.table")) {
+  install.packages("data.table", dependencies = TRUE)
+  library(data.table)
+}
+
+if (!require("glmnet")) {
+  install.packages("glmnet", dependencies = TRUE)
+  library(glmnet)
+}
+
+if (!require("ggplot2")) {
+  install.packages("ggplot2", dependencies = TRUE)
+  library(ggplot2)
+}
+
+if (!require("gridExtra")) {
+  install.packages("gridExtra", dependencies = TRUE)
+  library(gridExtra)
+}
+
+# script only looks at the first column of the metadata file (for sample names) and the second column (for the assigned class/group/category/celltype to each sample)
+# metadata file must have column names and they could be anything, after reading in metadata file, the two columns are named as "sample_name" and "group"
+# An example metadata file:
+#Run	type
+#SRR8788980	carcinoma
+#SRR8788981	melanoma
+#SRR8788982	lymphoma
+#SRR8788983	carcinoma
+#SRR8657060	carcinoma
+
+
+######################################################
+############ INPUT PARAMETERS ########################
+
+args <- commandArgs(trailingOnly = TRUE)
+num_bins = as.numeric(args[1])
+sample_name_to_id_mapping = args[2]
+satc_dump_path = args[3]
+satc_dir = args[4]
+anchors_file = args[5]
+directory = args[6] # example: "/oak/stanford/groups/horence/Roozbeh/NOMAD_10x/runs/CCLE_all/"
+metadata_file = args[7] # example: "/oak/stanford/groups/horence/Roozbeh/NOMAD_10x/utility_files/CCLE_metadata_modified.tsv"
+run_name = args[8] # the output files and plots will be generated in ${directory}/${run_name}_supervised_metadata
+anchor_sample_fraction_cutoff = as.numeric(args[9]) # the cutoff for the minimum fraction of samples for each anchor, suggested 0.4
+num_anchors_for_supervised_test = as.integer(args[10]) # maximum number of anchors to be tested example, suggested 20000
+
+
+
+############## reading in metadata and sample_conversion files #########################
+metadata = fread(metadata_file)
+sample_name_id_conversion = fread(sample_name_to_id_mapping,header = FALSE)
+###############################################################
+
+################################################################
+##### select the anchors for the supervised GLM when no specific list of anchors is provided ################
+anchors = fread(anchors_file,select = c("anchor", "effect_size_bin", "number_nonzero_samples", "most_freq_target_1", "cnt_most_freq_target_1", "most_freq_target_2", "cnt_most_freq_target_2","avg_hamming_distance_max_target"))
+max_sample_num = max(anchors$number_nonzero_samples,na.rm = TRUE)
+anchors = anchors[number_nonzero_samples > (max_sample_num * anchor_sample_fraction_cutoff) & avg_hamming_distance_max_target > 5] ## keep only anchors that are in at least the given fraction of samples 
+setorder(anchors,-effect_size_bin)
+selected_anchors = anchors[1:min(num_anchors_for_supervised_test,nrow(anchors)),]
+
+################################################################
+################################################################
+system(paste("mkdir ", directory,"/",run_name,"_supervised_metadata",sep="")) # this is the directory for the supervised analysis files
+##################################################################################
+####### satc_dump for getting counts for the selected anchors ####################
+write.table(selected_anchors$anchor,paste(directory, "/",run_name,"_supervised_metadata/", "selected_anchors_GLMnet.txt",sep=""),sep="\t",row.names=FALSE,quote=FALSE,col.names = FALSE)
+for (counter in 0:num_bins-1) {
+  system(paste(satc_dump_path, paste(" --anchor_list ", paste(directory, "/",run_name,"_supervised_metadata/", "selected_anchors_GLMnet.txt ", sep = ""), satc_dir, "/bin", counter, ".satc ", directory, "/",run_name,"_supervised_metadata/", "satcOut",counter,".tsv ", sep = "")))
+}
+# now below I read in satcout files generated by satc_dump
+satcOut_files = list.files(path = paste(directory,"/",run_name,"_supervised_metadata/",sep=""), pattern = "satcOut")
+counts = data.table()
+for (counter in 1:length(satcOut_files)) {
+  counts = rbind(counts, fread(paste(directory,"/",run_name,"_supervised_metadata/",satcOut_files[counter], sep = ""))) 
+  print(counter)
+}
+setnames(counts,c("V1","V2","V3","V4"),c("sample_id","anchor","target","count"))
+###################################################################################
+###################################################################################
+
+################# merging counts with metadata #######################
+######################################################################
+metadata = metadata[,c(1,2)]
+names(metadata) = c("sample_name", "group")
+metadata = merge(metadata, sample_name_id_conversion, all.x = TRUE, all.y = FALSE, by.x="sample_name", by.y="V1")
+metadata = metadata[!is.na(V2)]
+counts = merge(counts, metadata, all.x = TRUE, all.y = FALSE, by.x = "sample_id", by.y = "V2")
+######################################################################
+######################################################################
+
+system(paste("mkdir ", directory,"/",run_name,"_supervised_metadata/plots",sep=""))
+GLM_output_dt = data.table() # the data table that will keep glm information for the selected anchors
+#############################################################
+####### perform GLM for each selected anchor ################
+for (counter in 1:nrow(selected_anchors)){
+  tryCatch({
+    anchor_interest = selected_anchors$anchor[counter]
+    counts_anchor = counts[anchor==anchor_interest] # target counts for the selected anchor 
+    counts_anchor[,target_count := sum(count),by=target] # compute total counts for each target
+    top_two_targets = counts_anchor[!duplicated(target)][order(-target_count)]$target[1:2] # find the top two targets
+    counts_anchor = counts_anchor[target %in% top_two_targets]
+    
+    #counts_anchor[,anchor_count_per_sample:=sum(count),by=paste(anchor,sample_name)]
+    counts_anchor[,anchor_count_per_sample:=sum(count),by=list(anchor,sample_name)] #mkokot_TODO: is this fix ok?
+    counts_anchor[,fraction:=count/anchor_count_per_sample,by=1:nrow(counts_anchor)] # compute target fraction per sample
+    counts_anchor_reshape = reshape(counts_anchor[,list(sample_name,target,fraction,count)], idvar="sample_name", timevar="target", direction="wide")
+    names(counts_anchor_reshape) = c("sample_name","target1_frac","target1_count","target2_frac","target2_count")
+    counts_anchor_reshape[is.na(target1_frac),target1_frac:=0]
+    counts_anchor_reshape[is.na(target2_frac),target2_frac:=0]
+    counts_anchor_reshape[is.na(target1_count),target1_count:=0]
+    counts_anchor_reshape[is.na(target2_count),target2_count:=0]
+    
+    sample_names = data.table(counts_anchor_reshape$sample_name)
+    sample_names = merge(sample_names,counts_anchor[!duplicated(sample_name),list(sample_name,group)],all.x=TRUE,all.y = FALSE,by.x="V1",by.y="sample_name")
+    sample_names[,class:=as.numeric(as.factor(group))] # class is the numeric conversion of the groups which is needed for the multinomnial GLM
+    
+    counts_anchor_reshape[,sample_name:=NULL]
+    
+    regression_formula = as.formula( "class ~ target1_frac + target2_frac")
+    
+    counts_anchor_reshape = cbind(counts_anchor_reshape,sample_names$group,sample_names$class)
+    colnames(counts_anchor_reshape)[5] = "group"
+    colnames(counts_anchor_reshape)[6] = "class"
+    counts_anchor_reshape = counts_anchor_reshape[!is.na(group)] 
+    counts_anchor_reshape[,num_per_group:=.N,by=group]
+    counts_anchor_reshape[,weight:=1/num_per_group]
+    counts_anchor_reshape = counts_anchor_reshape[num_per_group>9]
+    
+    # below if there is at least two groups for the anchor we perform GLMnet multinomial regression 
+    if (length(unique(counts_anchor_reshape$group))>1){
+      x_glmnet = model.matrix(regression_formula, counts_anchor_reshape)
+      glmnet_model = cv.glmnet(x_glmnet, as.factor(counts_anchor_reshape$group), family = c("multinomial"), intercept = FALSE, alpha = 1, nlambda = 50, nfolds = 5, weights = counts_anchor_reshape$weight)
+      glmnet_coeffs = coef(glmnet_model)
+      
+      largest_GLM_coefficient = max(unlist(lapply(lapply(glmnet_coeffs,abs),max))) # this will give us the GLM coefficient with the highest magnitude
+      # if the largest coefficient >1 then we plot target counts fractions and add it to the output table 
+      if (largest_GLM_coefficient > 1){
+        metadata_group_names = paste(names(glmnet_coeffs), collapse = "||") # collapse all the group names for this into a string
+        GLM_coefficients = paste(lapply(glmnet_coeffs, paste, collapse = ":"),collapse = "||") # collapse all the GLM coefficients into a string
+        GLM_output_dt_anchor = cbind(selected_anchors[anchor==anchor_interest], metadata_group_names, GLM_coefficients, largest_GLM_coefficient)
+        GLM_output_dt = rbind(GLM_output_dt, GLM_output_dt_anchor)
+        
+        #generate and write boxplot/scatterplot for the anchor
+        pdf(file=paste(directory, "/",run_name,"_supervised_metadata/plots/", anchor_interest, ".pdf", sep = ""), width = 8, height = 6)
+        g1 = ggplot(counts_anchor_reshape, aes(y = target1_frac, x = as.factor(group), color = as.factor(group))) + geom_boxplot() + theme_bw() +ggtitle(anchor_interest)
+        g2 = ggplot(counts_anchor_reshape, aes(x = target1_count, y = target2_count, shape = as.factor(group), color = as.factor(group))) + geom_point() + theme_bw()
+        print(grid.arrange(g1, g2, nrow = 2))
+        dev.off()
+      }
+    }
+  },error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
+}
+write.table(GLM_output_dt, paste(directory,"/",run_name,"_supervised_metadata/GLM_supervised_file.tsv", sep = ""), sep = "\t", row.names = FALSE, quote = FALSE)
+######################################################################################
+######################################################################################