From 42e2d6a1e84c246c105ca669dc76985ec65792cb Mon Sep 17 00:00:00 2001 From: Nikos Vasilakis Date: Wed, 8 Nov 2023 21:51:37 -0500 Subject: [PATCH 1/2] Update data URL --- evaluation/benchmarks/bio/bio-align/genome-diff.sh | 2 +- evaluation/benchmarks/bio/bio-align/genquality.sh | 2 +- evaluation/benchmarks/bio/bio1/setup.sh | 2 +- evaluation/benchmarks/max-temp/max-temp-preprocess.sh | 4 ++-- evaluation/benchmarks/max-temp/max-temp.sh | 2 +- evaluation/benchmarks/max-temp/temp-analytics.sh | 2 +- evaluation/benchmarks/nlp/input/setup.sh | 2 +- evaluation/benchmarks/oneliners/input/setup.sh | 8 ++++---- evaluation/benchmarks/web-index/input/setup.sh | 3 +-- evaluation/intro/input/setup.sh | 4 ++-- evaluation/other/more-scripts/page-count.sh | 2 +- evaluation/other/more-scripts/spell.sh | 2 +- evaluation/tests/input/setup.sh | 6 +++--- evaluation/tests/sed-test.sh | 6 +++--- 14 files changed, 23 insertions(+), 24 deletions(-) diff --git a/evaluation/benchmarks/bio/bio-align/genome-diff.sh b/evaluation/benchmarks/bio/bio-align/genome-diff.sh index a269f9e95..c82061797 100755 --- a/evaluation/benchmarks/bio/bio-align/genome-diff.sh +++ b/evaluation/benchmarks/bio/bio-align/genome-diff.sh @@ -11,7 +11,7 @@ # bacteria), and any regions with less than 10 supporting reads. # Requires: samtools, minimap2, bcftools -# Data: http://ndr.md/data/bio/R1.fastq.gz http://ndr.md/data/bio/R2.fastq.gz http://ndr.md/data/bio/ref.fa +# Data: atlas-group.cs.brown.edu/data/bio/R1.fastq.gz atlas-group.cs.brown.edu/data/bio/R2.fastq.gz atlas-group.cs.brown.edu/data/bio/ref.fa # https://github.com/samtools/samtools/releases/latest # https://github.com/lh3/minimap2 diff --git a/evaluation/benchmarks/bio/bio-align/genquality.sh b/evaluation/benchmarks/bio/bio-align/genquality.sh index 64c777fdd..62c731960 100755 --- a/evaluation/benchmarks/bio/bio-align/genquality.sh +++ b/evaluation/benchmarks/bio/bio-align/genquality.sh @@ -6,7 +6,7 @@ # http://thegenomefactory.blogspot.com/2019/09/25-reasons-assemblies-dont-make-it-into.html # Require: csvkit -# Data: http://ndr.md/data/bio/genbank.txt +# Data: atlas-group.cs.brown.edu/data/bio/genbank.txt IN=./input/genbank.txt OUT=./output/out.txt diff --git a/evaluation/benchmarks/bio/bio1/setup.sh b/evaluation/benchmarks/bio/bio1/setup.sh index 40bdd47cb..9c2bb1629 100644 --- a/evaluation/benchmarks/bio/bio1/setup.sh +++ b/evaluation/benchmarks/bio/bio1/setup.sh @@ -8,7 +8,7 @@ mkdir -p input mkdir -p output cd input if [[ ! -f R1.fastq ]]; then - wget ndr.md/data/bio/{R1.fastq.gz,R2.fastq.gz,ref.fa} + wget atlas-group.cs.brown.edu/data/bio/{R1.fastq.gz,R2.fastq.gz,ref.fa} gunzip R1.fastq.gz gunzip R2.fastq.gz diff --git a/evaluation/benchmarks/max-temp/max-temp-preprocess.sh b/evaluation/benchmarks/max-temp/max-temp-preprocess.sh index e3d4b98c5..8d0719049 100755 --- a/evaluation/benchmarks/max-temp/max-temp-preprocess.sh +++ b/evaluation/benchmarks/max-temp/max-temp-preprocess.sh @@ -1,12 +1,12 @@ #!/bin/bash -sed 's;^;http://ndr.md/data/noaa/;' | +sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' | sed 's;$;/;' | xargs -r -n 1 curl -s | grep gz | tr -s ' \n' | cut -d ' ' -f9 | sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | - sed 's;^;http://ndr.md/data/noaa/;' | + sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' | xargs -n1 curl -s | gunzip diff --git a/evaluation/benchmarks/max-temp/max-temp.sh b/evaluation/benchmarks/max-temp/max-temp.sh index b0c18aaa8..b74f72b10 100755 --- a/evaluation/benchmarks/max-temp/max-temp.sh +++ b/evaluation/benchmarks/max-temp/max-temp.sh @@ -2,7 +2,7 @@ FROM=${FROM:-2015} TO=${TO:-2015} -IN=${IN:-'http://ndr.md/data/noaa/'} +IN=${IN:-'atlas-group.cs.brown.edu/data/noaa/'} fetch=${fetch:-"curl -s"} seq $FROM $TO | diff --git a/evaluation/benchmarks/max-temp/temp-analytics.sh b/evaluation/benchmarks/max-temp/temp-analytics.sh index 319a8f0e4..a1399fa7d 100755 --- a/evaluation/benchmarks/max-temp/temp-analytics.sh +++ b/evaluation/benchmarks/max-temp/temp-analytics.sh @@ -2,7 +2,7 @@ FROM=${FROM:-2015} TO=${TO:-2015} -IN=${IN:-'http://ndr.md/data/noaa/'} +IN=${IN:-'atlas-group.cs.brown.edu/data/noaa/'} fetch=${fetch:-"curl -s"} data_file=temperatures.txt diff --git a/evaluation/benchmarks/nlp/input/setup.sh b/evaluation/benchmarks/nlp/input/setup.sh index 5486b39f2..a26a9cf19 100755 --- a/evaluation/benchmarks/nlp/input/setup.sh +++ b/evaluation/benchmarks/nlp/input/setup.sh @@ -20,7 +20,7 @@ setup_dataset() { cd pg if [[ "$1" == "--full" ]]; then echo 'N.b.: download/extraction will take about 10min' - wget ndr.md/data/pg.tar.xz + wget atlas-group.cs.brown.edu/data/pg.tar.xz # FIXME: moving to PG soon if [ $? -ne 0 ]; then cat <<-'EOF' | sed 's/^ *//' Downloading input dataset failed, thus need to manually rsync all books from project gutenberg: diff --git a/evaluation/benchmarks/oneliners/input/setup.sh b/evaluation/benchmarks/oneliners/input/setup.sh index 96388980d..eb8a00317 100755 --- a/evaluation/benchmarks/oneliners/input/setup.sh +++ b/evaluation/benchmarks/oneliners/input/setup.sh @@ -26,7 +26,7 @@ setup_dataset() { fi if [ ! -f ./1M.txt ]; then - curl -sf 'http://ndr.md/data/dummy/1M.txt' > 1M.txt + curl -sf 'atlas-group.cs.brown.edu/data/dummy/1M.txt' > 1M.txt if [ $? -ne 0 ]; then echo 'cannot find 1M.txt -- please contact the developers of pash' exit 1 @@ -51,7 +51,7 @@ setup_dataset() { fi if [ ! -f ./1G.txt ]; then - curl -sf 'http://ndr.md/data/dummy/1G.txt' > 1G.txt + curl -sf 'atlas-group.cs.brown.edu/data/dummy/1G.txt' > 1G.txt if [ $? -ne 0 ]; then echo 'cannot find 1G.txt -- please contact the developers of pash' exit 1 @@ -61,7 +61,7 @@ setup_dataset() { # download wamerican-insane dictionary and sort according to machine if [ ! -f ./dict.txt ]; then - curl -sf 'http://ndr.md/data/dummy/dict.txt' | sort > dict.txt + curl -sf 'atlas-group.cs.brown.edu/data/dummy/dict.txt' | sort > dict.txt if [ $? -ne 0 ]; then echo 'cannot find dict.txt -- please contact the developers of pash' exit 1 @@ -70,7 +70,7 @@ setup_dataset() { fi if [ ! -f ./all_cmds.txt ]; then - curl -sf 'http://ndr.md/data/dummy/all_cmds.txt' > all_cmds.txt + curl -sf 'atlas-group.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt if [ $? -ne 0 ]; then # This should be OK for tests, no need for abort ls /usr/bin/* > all_cmds.txt diff --git a/evaluation/benchmarks/web-index/input/setup.sh b/evaluation/benchmarks/web-index/input/setup.sh index 72a4fd8f9..79a77276a 100755 --- a/evaluation/benchmarks/web-index/input/setup.sh +++ b/evaluation/benchmarks/web-index/input/setup.sh @@ -17,8 +17,7 @@ setup_dataset() { wget $wiki_archive || eexit "cannot fetch wikipedia" 7za x wikipedia-en-html.tar.7z tar -xvf wikipedia-en-html.tar - wget http://ndr.md/data/wikipedia/index.txt # || eexit "cannot fetch wiki indices" - # It is actually OK if we don't have this index since we download the 500/1000 below + wget atlas-group.cs.brown.edu/data/wikipedia/index.txt # FIXME: we download index below? fi if [ "$1" = "--small" ]; then diff --git a/evaluation/intro/input/setup.sh b/evaluation/intro/input/setup.sh index e984d53db..c2eaa684d 100755 --- a/evaluation/intro/input/setup.sh +++ b/evaluation/intro/input/setup.sh @@ -8,7 +8,7 @@ cd $(dirname $0) if [ ! -f ./100M.txt ]; then - curl -sf --connect-timeout 10 'ndr.md/data/dummy/100M.txt' > 100M.txt + curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/100M.txt' > 100M.txt if [ $? -ne 0 ]; then # Pipe curl through tac (twice) in order to consume all the output from curl. # This way, curl can write the whole page and not emit an error code. @@ -23,7 +23,7 @@ if [ ! -f ./100M.txt ]; then fi if [ ! -f ./words ]; then - curl -sf --connect-timeout 10 'http://ndr.md/data/dummy/words' > words + curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/words' > words if [ $? -ne 0 ]; then curl -sf 'https://zenodo.org/record/7650885/files/words' > words if [ $? -ne 0 ]; then diff --git a/evaluation/other/more-scripts/page-count.sh b/evaluation/other/more-scripts/page-count.sh index b4a3326e5..c4d89ecfd 100755 --- a/evaluation/other/more-scripts/page-count.sh +++ b/evaluation/other/more-scripts/page-count.sh @@ -5,7 +5,7 @@ # Require: libimage-exiftool-perl, bc # Data: -# http://ndr.md/data/dummy/large.pdf +# atlas-group.cs.brown.edu/data/large.pdf # More data: # https://arxiv.org/help/bulk_data diff --git a/evaluation/other/more-scripts/spell.sh b/evaluation/other/more-scripts/spell.sh index 1d4a9f330..9fd5e7384 100755 --- a/evaluation/other/more-scripts/spell.sh +++ b/evaluation/other/more-scripts/spell.sh @@ -6,7 +6,7 @@ # TODO: `groff is an interesting "pure", whose wrapper only needs split input # TODO: files carefully. -# Data: http://ndr.md/data/dummy/ronn.1 +# Data: atlas-group.cs.brown.edu/data/dummy/ronn.1 # dict depends on the system (and has to be sorted), so we assume it exists dict=./input/dict.txt diff --git a/evaluation/tests/input/setup.sh b/evaluation/tests/input/setup.sh index ed14d8955..ccc6712fe 100755 --- a/evaluation/tests/input/setup.sh +++ b/evaluation/tests/input/setup.sh @@ -16,7 +16,7 @@ esac [ "$1" = "-c" ] && rm-files 1M.txt all_cmds.txt words sorted_words 10M.txt if [ ! -f ./1M.txt ]; then - curl -sf --connect-timeout 10 'http://ndr.md/data/dummy/1M.txt' > 1M.txt + curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/1M.txt' > 1M.txt if [ $? -ne 0 ]; then curl -f 'https://zenodo.org/record/7650885/files/1M.txt' > 1M.txt if [ $? -ne 0 ]; then @@ -29,7 +29,7 @@ fi if [ ! -f ./all_cmds.txt ]; then if [ "$(hostname)" = "deathstar" ]; then - curl -sf --connect-timeout 10 'http://ndr.md/data/dummy/all_cmds.txt' > all_cmds.txt + curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt if [ $? -ne 0 ]; then curl -f 'https://zenodo.org/record/7650885/files/all_cmds.txt' > all_cmds.txt || eexit "all_cmds not found" fi @@ -40,7 +40,7 @@ if [ ! -f ./all_cmds.txt ]; then fi if [ ! -f ./words ]; then - curl -sf --connect-timeout 10 'http://ndr.md/data/dummy/words' > words + curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/words' > words if [ $? -ne 0 ]; then curl -f 'https://zenodo.org/record/7650885/files/words' > words if [ $? -ne 0 ]; then diff --git a/evaluation/tests/sed-test.sh b/evaluation/tests/sed-test.sh index f5ba0ac85..38d1cc855 100644 --- a/evaluation/tests/sed-test.sh +++ b/evaluation/tests/sed-test.sh @@ -1,11 +1,11 @@ cat $PASH_TOP/evaluation/tests/input/1M.txt | sed 's;^d;da;' | - sed 's;^;http://ndr.md/data/noaa/;' | + sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' | sed 's;$;/;' | sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | - sed 's;^;http://ndr.md/data/noaa/;' | + sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' | sed "s#^#$WIKI#" | sed s/\$/'0s'/ | sed 1d | sed 4d | - sed "\$d" \ No newline at end of file + sed "\$d" From bfc02a0d749100f9ff2768d02512b989e42946bf Mon Sep 17 00:00:00 2001 From: dileventi Date: Tue, 12 Dec 2023 23:23:22 +0200 Subject: [PATCH 2/2] atlas-group to atlas fix --- evaluation/benchmarks/bio/bio-align/genome-diff.sh | 2 +- evaluation/benchmarks/bio/bio-align/genquality.sh | 2 +- evaluation/benchmarks/bio/bio1/setup.sh | 2 +- evaluation/benchmarks/max-temp/max-temp-preprocess.sh | 4 ++-- evaluation/benchmarks/max-temp/max-temp.sh | 2 +- evaluation/benchmarks/max-temp/temp-analytics.sh | 2 +- evaluation/benchmarks/nlp/input/setup.sh | 2 +- evaluation/benchmarks/oneliners/input/setup.sh | 8 ++++---- evaluation/intro/input/setup.sh | 4 ++-- evaluation/other/more-scripts/page-count.sh | 2 +- evaluation/other/more-scripts/spell.sh | 2 +- evaluation/tests/input/setup.sh | 6 +++--- evaluation/tests/sed-test.sh | 4 ++-- 13 files changed, 21 insertions(+), 21 deletions(-) diff --git a/evaluation/benchmarks/bio/bio-align/genome-diff.sh b/evaluation/benchmarks/bio/bio-align/genome-diff.sh index c82061797..7d1d2537c 100755 --- a/evaluation/benchmarks/bio/bio-align/genome-diff.sh +++ b/evaluation/benchmarks/bio/bio-align/genome-diff.sh @@ -11,7 +11,7 @@ # bacteria), and any regions with less than 10 supporting reads. # Requires: samtools, minimap2, bcftools -# Data: atlas-group.cs.brown.edu/data/bio/R1.fastq.gz atlas-group.cs.brown.edu/data/bio/R2.fastq.gz atlas-group.cs.brown.edu/data/bio/ref.fa +# Data: atlas.cs.brown.edu/data/bio/R1.fastq.gz atlas.cs.brown.edu/data/bio/R2.fastq.gz atlas.cs.brown.edu/data/bio/ref.fa # https://github.com/samtools/samtools/releases/latest # https://github.com/lh3/minimap2 diff --git a/evaluation/benchmarks/bio/bio-align/genquality.sh b/evaluation/benchmarks/bio/bio-align/genquality.sh index 62c731960..4f82b2356 100755 --- a/evaluation/benchmarks/bio/bio-align/genquality.sh +++ b/evaluation/benchmarks/bio/bio-align/genquality.sh @@ -6,7 +6,7 @@ # http://thegenomefactory.blogspot.com/2019/09/25-reasons-assemblies-dont-make-it-into.html # Require: csvkit -# Data: atlas-group.cs.brown.edu/data/bio/genbank.txt +# Data: atlas.cs.brown.edu/data/bio/genbank.txt IN=./input/genbank.txt OUT=./output/out.txt diff --git a/evaluation/benchmarks/bio/bio1/setup.sh b/evaluation/benchmarks/bio/bio1/setup.sh index 9c2bb1629..3d75650ce 100644 --- a/evaluation/benchmarks/bio/bio1/setup.sh +++ b/evaluation/benchmarks/bio/bio1/setup.sh @@ -8,7 +8,7 @@ mkdir -p input mkdir -p output cd input if [[ ! -f R1.fastq ]]; then - wget atlas-group.cs.brown.edu/data/bio/{R1.fastq.gz,R2.fastq.gz,ref.fa} + wget atlas.cs.brown.edu/data/bio/{R1.fastq.gz,R2.fastq.gz,ref.fa} gunzip R1.fastq.gz gunzip R2.fastq.gz diff --git a/evaluation/benchmarks/max-temp/max-temp-preprocess.sh b/evaluation/benchmarks/max-temp/max-temp-preprocess.sh index 8d0719049..c5f78fe58 100755 --- a/evaluation/benchmarks/max-temp/max-temp-preprocess.sh +++ b/evaluation/benchmarks/max-temp/max-temp-preprocess.sh @@ -1,12 +1,12 @@ #!/bin/bash -sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' | +sed 's;^;atlas.cs.brown.edu/data/noaa/;' | sed 's;$;/;' | xargs -r -n 1 curl -s | grep gz | tr -s ' \n' | cut -d ' ' -f9 | sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | - sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' | + sed 's;^;atlas.cs.brown.edu/data/noaa/;' | xargs -n1 curl -s | gunzip diff --git a/evaluation/benchmarks/max-temp/max-temp.sh b/evaluation/benchmarks/max-temp/max-temp.sh index b74f72b10..539db1ff2 100755 --- a/evaluation/benchmarks/max-temp/max-temp.sh +++ b/evaluation/benchmarks/max-temp/max-temp.sh @@ -2,7 +2,7 @@ FROM=${FROM:-2015} TO=${TO:-2015} -IN=${IN:-'atlas-group.cs.brown.edu/data/noaa/'} +IN=${IN:-'atlas.cs.brown.edu/data/noaa/'} fetch=${fetch:-"curl -s"} seq $FROM $TO | diff --git a/evaluation/benchmarks/max-temp/temp-analytics.sh b/evaluation/benchmarks/max-temp/temp-analytics.sh index a1399fa7d..542a85639 100755 --- a/evaluation/benchmarks/max-temp/temp-analytics.sh +++ b/evaluation/benchmarks/max-temp/temp-analytics.sh @@ -2,7 +2,7 @@ FROM=${FROM:-2015} TO=${TO:-2015} -IN=${IN:-'atlas-group.cs.brown.edu/data/noaa/'} +IN=${IN:-'atlas.cs.brown.edu/data/noaa/'} fetch=${fetch:-"curl -s"} data_file=temperatures.txt diff --git a/evaluation/benchmarks/nlp/input/setup.sh b/evaluation/benchmarks/nlp/input/setup.sh index a26a9cf19..143020bea 100755 --- a/evaluation/benchmarks/nlp/input/setup.sh +++ b/evaluation/benchmarks/nlp/input/setup.sh @@ -20,7 +20,7 @@ setup_dataset() { cd pg if [[ "$1" == "--full" ]]; then echo 'N.b.: download/extraction will take about 10min' - wget atlas-group.cs.brown.edu/data/pg.tar.xz # FIXME: moving to PG soon + wget atlas.cs.brown.edu/data/pg.tar.xz # FIXME: moving to PG soon if [ $? -ne 0 ]; then cat <<-'EOF' | sed 's/^ *//' Downloading input dataset failed, thus need to manually rsync all books from project gutenberg: diff --git a/evaluation/benchmarks/oneliners/input/setup.sh b/evaluation/benchmarks/oneliners/input/setup.sh index eb8a00317..0f460b5e0 100755 --- a/evaluation/benchmarks/oneliners/input/setup.sh +++ b/evaluation/benchmarks/oneliners/input/setup.sh @@ -26,7 +26,7 @@ setup_dataset() { fi if [ ! -f ./1M.txt ]; then - curl -sf 'atlas-group.cs.brown.edu/data/dummy/1M.txt' > 1M.txt + curl -sf 'atlas.cs.brown.edu/data/dummy/1M.txt' > 1M.txt if [ $? -ne 0 ]; then echo 'cannot find 1M.txt -- please contact the developers of pash' exit 1 @@ -51,7 +51,7 @@ setup_dataset() { fi if [ ! -f ./1G.txt ]; then - curl -sf 'atlas-group.cs.brown.edu/data/dummy/1G.txt' > 1G.txt + curl -sf 'atlas.cs.brown.edu/data/dummy/1G.txt' > 1G.txt if [ $? -ne 0 ]; then echo 'cannot find 1G.txt -- please contact the developers of pash' exit 1 @@ -61,7 +61,7 @@ setup_dataset() { # download wamerican-insane dictionary and sort according to machine if [ ! -f ./dict.txt ]; then - curl -sf 'atlas-group.cs.brown.edu/data/dummy/dict.txt' | sort > dict.txt + curl -sf 'atlas.cs.brown.edu/data/dummy/dict.txt' | sort > dict.txt if [ $? -ne 0 ]; then echo 'cannot find dict.txt -- please contact the developers of pash' exit 1 @@ -70,7 +70,7 @@ setup_dataset() { fi if [ ! -f ./all_cmds.txt ]; then - curl -sf 'atlas-group.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt + curl -sf 'atlas.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt if [ $? -ne 0 ]; then # This should be OK for tests, no need for abort ls /usr/bin/* > all_cmds.txt diff --git a/evaluation/intro/input/setup.sh b/evaluation/intro/input/setup.sh index c2eaa684d..7681d24fe 100755 --- a/evaluation/intro/input/setup.sh +++ b/evaluation/intro/input/setup.sh @@ -8,7 +8,7 @@ cd $(dirname $0) if [ ! -f ./100M.txt ]; then - curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/100M.txt' > 100M.txt + curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/100M.txt' > 100M.txt if [ $? -ne 0 ]; then # Pipe curl through tac (twice) in order to consume all the output from curl. # This way, curl can write the whole page and not emit an error code. @@ -23,7 +23,7 @@ if [ ! -f ./100M.txt ]; then fi if [ ! -f ./words ]; then - curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/words' > words + curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/words' > words if [ $? -ne 0 ]; then curl -sf 'https://zenodo.org/record/7650885/files/words' > words if [ $? -ne 0 ]; then diff --git a/evaluation/other/more-scripts/page-count.sh b/evaluation/other/more-scripts/page-count.sh index c4d89ecfd..08da185a0 100755 --- a/evaluation/other/more-scripts/page-count.sh +++ b/evaluation/other/more-scripts/page-count.sh @@ -5,7 +5,7 @@ # Require: libimage-exiftool-perl, bc # Data: -# atlas-group.cs.brown.edu/data/large.pdf +# atlas.cs.brown.edu/data/large.pdf # More data: # https://arxiv.org/help/bulk_data diff --git a/evaluation/other/more-scripts/spell.sh b/evaluation/other/more-scripts/spell.sh index 9fd5e7384..e11efa1c1 100755 --- a/evaluation/other/more-scripts/spell.sh +++ b/evaluation/other/more-scripts/spell.sh @@ -6,7 +6,7 @@ # TODO: `groff is an interesting "pure", whose wrapper only needs split input # TODO: files carefully. -# Data: atlas-group.cs.brown.edu/data/dummy/ronn.1 +# Data: atlas.cs.brown.edu/data/dummy/ronn.1 # dict depends on the system (and has to be sorted), so we assume it exists dict=./input/dict.txt diff --git a/evaluation/tests/input/setup.sh b/evaluation/tests/input/setup.sh index ccc6712fe..eefe878c7 100755 --- a/evaluation/tests/input/setup.sh +++ b/evaluation/tests/input/setup.sh @@ -16,7 +16,7 @@ esac [ "$1" = "-c" ] && rm-files 1M.txt all_cmds.txt words sorted_words 10M.txt if [ ! -f ./1M.txt ]; then - curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/1M.txt' > 1M.txt + curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/1M.txt' > 1M.txt if [ $? -ne 0 ]; then curl -f 'https://zenodo.org/record/7650885/files/1M.txt' > 1M.txt if [ $? -ne 0 ]; then @@ -29,7 +29,7 @@ fi if [ ! -f ./all_cmds.txt ]; then if [ "$(hostname)" = "deathstar" ]; then - curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt + curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/all_cmds.txt' > all_cmds.txt if [ $? -ne 0 ]; then curl -f 'https://zenodo.org/record/7650885/files/all_cmds.txt' > all_cmds.txt || eexit "all_cmds not found" fi @@ -40,7 +40,7 @@ if [ ! -f ./all_cmds.txt ]; then fi if [ ! -f ./words ]; then - curl -sf --connect-timeout 10 'atlas-group.cs.brown.edu/data/dummy/words' > words + curl -sf --connect-timeout 10 'atlas.cs.brown.edu/data/dummy/words' > words if [ $? -ne 0 ]; then curl -f 'https://zenodo.org/record/7650885/files/words' > words if [ $? -ne 0 ]; then diff --git a/evaluation/tests/sed-test.sh b/evaluation/tests/sed-test.sh index 38d1cc855..c82972bb6 100644 --- a/evaluation/tests/sed-test.sh +++ b/evaluation/tests/sed-test.sh @@ -1,9 +1,9 @@ cat $PASH_TOP/evaluation/tests/input/1M.txt | sed 's;^d;da;' | - sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' | + sed 's;^;atlas.cs.brown.edu/data/noaa/;' | sed 's;$;/;' | sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | - sed 's;^;atlas-group.cs.brown.edu/data/noaa/;' | + sed 's;^;atlas.cs.brown.edu/data/noaa/;' | sed "s#^#$WIKI#" | sed s/\$/'0s'/ | sed 1d |