binpash · nvasilakis · Nov 9, 2023 · Dec 12, 2023 · Dec 16, 2023
diff --git a/evaluation/benchmarks/bio/bio-align/genome-diff.sh b/evaluation/benchmarks/bio/bio-align/genome-diff.sh
@@ -11,7 +11,7 @@
 # bacteria), and any regions with less than 10 supporting reads.
 
 # Requires: samtools, minimap2, bcftools
-# Data: http://ndr.md/data/bio/R1.fastq.gz http://ndr.md/data/bio/R2.fastq.gz  http://ndr.md/data/bio/ref.fa
+# Data: atlas.cs.brown.edu/data/bio/R1.fastq.gz atlas.cs.brown.edu/data/bio/R2.fastq.gz  atlas.cs.brown.edu/data/bio/ref.fa
 
 # https://github.com/samtools/samtools/releases/latest
 # https://github.com/lh3/minimap2

diff --git a/evaluation/benchmarks/bio/bio-align/genquality.sh b/evaluation/benchmarks/bio/bio-align/genquality.sh
@@ -6,7 +6,7 @@
 # http://thegenomefactory.blogspot.com/2019/09/25-reasons-assemblies-dont-make-it-into.html
 
 # Require: csvkit
-# Data: http://ndr.md/data/bio/genbank.txt
+# Data: atlas.cs.brown.edu/data/bio/genbank.txt
 
 IN=./input/genbank.txt
 OUT=./output/out.txt

diff --git a/evaluation/benchmarks/bio/bio1/setup.sh b/evaluation/benchmarks/bio/bio1/setup.sh
@@ -8,7 +8,7 @@ mkdir -p input
 mkdir -p output
 cd input
 if [[ ! -f R1.fastq ]]; then
-  wget ndr.md/data/bio/{R1.fastq.gz,R2.fastq.gz,ref.fa}
+  wget atlas.cs.brown.edu/data/bio/{R1.fastq.gz,R2.fastq.gz,ref.fa}
 
   gunzip R1.fastq.gz
   gunzip R2.fastq.gz

diff --git a/evaluation/benchmarks/max-temp/max-temp-preprocess.sh b/evaluation/benchmarks/max-temp/max-temp-preprocess.sh
@@ -1,12 +1,12 @@
 #!/bin/bash
 
-sed 's;^;http://ndr.md/data/noaa/;' |
+sed 's;^;atlas.cs.brown.edu/data/noaa/;' |
     sed 's;$;/;' |
     xargs -r -n 1 curl -s |
     grep gz |
     tr -s ' \n' |
     cut -d ' ' -f9 |
     sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' |
-    sed 's;^;http://ndr.md/data/noaa/;' |
+    sed 's;^;atlas.cs.brown.edu/data/noaa/;' |
     xargs -n1 curl -s |
     gunzip
diff --git a/evaluation/benchmarks/max-temp/max-temp.sh b/evaluation/benchmarks/max-temp/max-temp.sh
@@ -2,7 +2,7 @@
 
 FROM=${FROM:-2015}
 TO=${TO:-2015}
-IN=${IN:-'http://ndr.md/data/noaa/'}
+IN=${IN:-'atlas.cs.brown.edu/data/noaa/'}
 fetch=${fetch:-"curl -s"}
 
 seq $FROM $TO |

diff --git a/evaluation/benchmarks/max-temp/temp-analytics.sh b/evaluation/benchmarks/max-temp/temp-analytics.sh
@@ -2,7 +2,7 @@
 
 FROM=${FROM:-2015}
 TO=${TO:-2015}
-IN=${IN:-'http://ndr.md/data/noaa/'}
+IN=${IN:-'atlas.cs.brown.edu/data/noaa/'}
 fetch=${fetch:-"curl -s"}
 
 data_file=temperatures.txt

diff --git a/evaluation/benchmarks/nlp/input/setup.sh b/evaluation/benchmarks/nlp/input/setup.sh
@@ -20,7 +20,7 @@ setup_dataset() {
     cd pg
   if [[ "$1" == "--full" ]]; then
     echo 'N.b.: download/extraction will take about 10min'
-    wget ndr.md/data/pg.tar.xz
+    wget atlas.cs.brown.edu/data/pg.tar.xz # FIXME: moving to PG soon
     if [ $? -ne 0 ]; then
 		cat <<-'EOF' | sed 's/^ *//'
 		Downloading input dataset failed, thus need to manually rsync all books from  project gutenberg:

diff --git a/evaluation/benchmarks/oneliners/input/setup.sh b/evaluation/benchmarks/oneliners/input/setup.sh
@@ -26,7 +26,7 @@ setup_dataset() {
   fi
 
     if [ ! -f ./1M.txt ]; then
-        curl -sf 'http://ndr.md/data/dummy/1M.txt' > 1M.txt
+        curl -sf 'atlas.cs.brown.edu/data/dummy/1M.txt' > 1M.txt
         if [ $? -ne 0 ]; then
             echo 'cannot find 1M.txt -- please contact the developers of pash'
             exit 1
@@ -51,7 +51,7 @@ setup_dataset() {
     fi
 
     if [ ! -f ./1G.txt ]; then
-        curl -sf 'http://ndr.md/data/dummy/1G.txt' > 1G.txt
+        curl -sf 'atlas.cs.brown.edu/data/dummy/1G.txt' > 1G.txt
         if [ $? -ne 0 ]; then
             echo 'cannot find 1G.txt -- please contact the developers of pash'
             exit 1
@@ -61,7 +61,7 @@ setup_dataset() {
 
   # download wamerican-insane dictionary and sort according to machine
   if [ ! -f ./dict.txt ]; then
-      curl -sf 'http://ndr.md/data/dummy/dict.txt' | sort > dict.txt
+      curl -sf 'atlas.cs.brown.edu/data/dummy/dict.txt' | sort > dict.txt
       if [ $? -ne 0 ]; then
           echo 'cannot find dict.txt -- please contact the developers of pash'
           exit 1
@@ -70,7 +70,7 @@ setup_dataset() {
     fi
 
     if [ ! -f ./all_cmds.txt ]; then
-        curl -sf 'http://ndr.md/data/dummy/all_cmds.txt' > all_cmds.txt
+        curl -sf 'atlas.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt
         if [ $? -ne 0 ]; then
             # This should be OK for tests, no need for abort
             ls /usr/bin/* > all_cmds.txt

diff --git a/evaluation/benchmarks/web-index/input/setup.sh b/evaluation/benchmarks/web-index/input/setup.sh
@@ -17,8 +17,7 @@ setup_dataset() {
     wget $wiki_archive || eexit "cannot fetch wikipedia"
     7za x wikipedia-en-html.tar.7z
     tar -xvf wikipedia-en-html.tar
-    wget http://ndr.md/data/wikipedia/index.txt # || eexit "cannot fetch wiki indices"
-    # It is actually OK if we don't have this index since we download the 500/1000 below
+    wget atlas-group.cs.brown.edu/data/wikipedia/index.txt # FIXME: we download index below?
   fi
 
   if [ "$1" = "--small" ]; then

diff --git a/evaluation/intro/input/setup.sh b/evaluation/intro/input/setup.sh
@@ -8,7 +8,7 @@ cd $(dirname $0)
 
 
 if [ ! -f ./100M.txt ]; then
-  curl -sf --connect-timeout 10 'ndr.md/data/dummy/100M.txt' > 100M.txt
+  curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/100M.txt' > 100M.txt
   if [ $? -ne 0 ]; then
     # Pipe curl through tac (twice) in order to consume all the output from curl.
     # This way, curl can write the whole page and not emit an error code.
@@ -23,7 +23,7 @@ if [ ! -f ./100M.txt ]; then
 fi
 
 if [ ! -f ./words ]; then
-  curl -sf --connect-timeout 10 'http://ndr.md/data/dummy/words' > words
+  curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/words' > words
   if [ $? -ne 0 ]; then
     curl -sf 'https://zenodo.org/record/7650885/files/words' > words
     if [ $? -ne 0 ]; then

diff --git a/evaluation/other/more-scripts/page-count.sh b/evaluation/other/more-scripts/page-count.sh
@@ -5,7 +5,7 @@
 
 # Require: libimage-exiftool-perl, bc
 # Data:
-#   http://ndr.md/data/dummy/large.pdf
+#   atlas.cs.brown.edu/data/large.pdf
 # More data:
 #   https://arxiv.org/help/bulk_data
 

diff --git a/evaluation/other/more-scripts/spell.sh b/evaluation/other/more-scripts/spell.sh
@@ -6,7 +6,7 @@
 # TODO: `groff is an interesting "pure", whose wrapper only needs split input
 # TODO: files carefully.
 
-# Data: http://ndr.md/data/dummy/ronn.1
+# Data: atlas.cs.brown.edu/data/dummy/ronn.1
 # dict depends on the system (and has to be sorted), so we assume it exists
 dict=./input/dict.txt
 

diff --git a/evaluation/tests/input/setup.sh b/evaluation/tests/input/setup.sh
@@ -16,7 +16,7 @@ esac
 [ "$1" = "-c" ] && rm-files 1M.txt all_cmds.txt words sorted_words 10M.txt
 
 if [ ! -f ./1M.txt ]; then
-  curl -sf --connect-timeout 10 'http://ndr.md/data/dummy/1M.txt' > 1M.txt
+  curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/1M.txt' > 1M.txt
   if [ $? -ne 0 ]; then
     curl -f 'https://zenodo.org/record/7650885/files/1M.txt' > 1M.txt
     if [ $? -ne 0 ]; then
@@ -29,7 +29,7 @@ fi
 
 if [ ! -f ./all_cmds.txt ]; then
   if [ "$(hostname)" = "deathstar" ]; then
-    curl -sf --connect-timeout 10 'http://ndr.md/data/dummy/all_cmds.txt' > all_cmds.txt
+    curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt
     if [ $? -ne 0 ]; then
       curl -f 'https://zenodo.org/record/7650885/files/all_cmds.txt' > all_cmds.txt || eexit "all_cmds not found"
     fi
@@ -40,7 +40,7 @@ if [ ! -f ./all_cmds.txt ]; then
 fi
 
 if [ ! -f ./words ]; then
-  curl -sf --connect-timeout 10 'http://ndr.md/data/dummy/words' > words
+  curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/words' > words
   if [ $? -ne 0 ]; then
     curl -f 'https://zenodo.org/record/7650885/files/words' > words
     if [ $? -ne 0 ]; then

diff --git a/evaluation/tests/sed-test.sh b/evaluation/tests/sed-test.sh
@@ -1,11 +1,11 @@
 cat $PASH_TOP/evaluation/tests/input/1M.txt |
     sed 's;^d;da;' |
-    sed 's;^;http://ndr.md/data/noaa/;' |
+    sed 's;^;atlas.cs.brown.edu/data/noaa/;' |
     sed 's;$;/;' |
     sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' |
-    sed 's;^;http://ndr.md/data/noaa/;' |
+    sed 's;^;atlas.cs.brown.edu/data/noaa/;' |
     sed "s#^#$WIKI#" |
     sed s/\$/'0s'/ |
     sed 1d |
     sed 4d |
-    sed "\$d"
+    sed "\$d"