-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrerank.sh
executable file
·123 lines (112 loc) · 4.46 KB
/
rerank.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/bin/bash
dataset_name=$1
file_append=$2
# whether to use chunks if desired
chunk_idx=$3
chunk_queries=$4
# the combs are the expansions
doc_comb=$5
# the types are "append", "replace", etc. how you combine them
doc_comb_type=$6
query_comb=$7
query_comb_type=$8
# check if $9 (the chosen model) is equal to inpars or monot5 or trained
if [[ "${9}" == "inpars" ]]; then
chosen_model="zeta-alpha-ai/monot5-3b-inpars-v2-$(echo $dataset_name | tr '-' '_')"
elif [[ "${9}" == "bm25" ]]; then
chosen_model="bm25"
elif [[ "${9}" == "monot5" ]]; then
chosen_model="castorini/monot5-3b-msmarco-10k"
elif [[ "${9}" == "monot5-small" ]]; then
chosen_model="castorini/monot5-small-msmarco-10k"
elif [[ "${9}" == "tart" ]]; then
chosen_model="facebook/tart-full-flan-t5-xl"
elif [[ "${9}" == "flan" ]]; then
chosen_model="google/flan-t5-xl"
elif [[ "${9}" == "wizard" ]]; then
chosen_model="conceptofmind/Flan-Open-Llama-7b"
elif [[ "${9}" == "contriever" ]]; then
chosen_model="facebook/contriever"
elif [[ "${9}" == "contriever_msmarco" ]]; then
chosen_model="facebook/contriever-msmarco"
elif [[ "${9}" == "monobert" ]]; then
chosen_model="castorini/monobert-large-msmarco-finetune-only"
elif [[ "${9}" == "dpr" ]]; then
chosen_model="facebook/dpr-question_encoder-single-nq-base"
elif [[ "${9}" == "colbertv2" ]]; then
chosen_model="colbertv2.0"
elif [[ "${9}" == "spladev2" ]]; then
chosen_model="naver/splade_v2_distil"
elif [[ "${9}" == "monot5-base" ]]; then
chosen_model="castorini/monot5-base-msmarco-10k"
elif [[ "${9}" == "monot5-large" ]]; then
chosen_model="castorini/monot5-large-msmarco-10k"
elif [[ "${9}" == "ms-marco-MiniLM-L-4-v2" ]]; then
chosen_model="cross-encoder/ms-marco-MiniLM-L-4-v2"
elif [[ "${9}" == "ms-marco-MiniLM-L-2-v2" ]]; then
chosen_model="cross-encoder/ms-marco-MiniLM-L-2-v2"
elif [[ "${9}" == "ms-marco-MiniLM-L-12-v2" ]]; then
chosen_model="cross-encoder/ms-marco-MiniLM-L-12-v2"
elif [[ "${9}" == "e5-large-v2" ]]; then
chosen_model="intfloat/e5-large-v2"
elif [[ "${9}" == "gte-large" ]]; then
chosen_model="thenlper/gte-large"
elif [[ "${9}" == "bge-large-en" ]]; then
chosen_model="BAAI/bge-large-en"
elif [[ "${9}" == "e5-small" ]]; then
chosen_model="intfloat/e5-small-v2"
elif [[ "${9}" == "e5-base-v2" ]]; then
chosen_model="intfloat/e5-base-v2"
elif [[ "${9}" == "gte-small" ]]; then
chosen_model="thenlper/gte-small"
elif [[ "${9}" == "bge-small" ]]; then
chosen_model="BAAI/bge-small-en"
elif [[ "${9}" == "all-mpnet-base-v2" ]]; then
chosen_model="sentence-transformers/all-mpnet-base-v2"
elif [[ "${9}" == "llama2" ]]; then
chosen_model="orionweller/llama2-reranker-msmarco-7b"
elif [[ "${9}" == "llama2-13b" ]]; then
chosen_model="orionweller/llama2-reranker-msmarco-13b"
elif [[ "${9}" == "custom" ]]; then
chosen_model="${13}"
else
echo "Error: ${9} is not a valid model name. Please see the options in 'rerank.sh'."
exit 1
fi
max_queries=${10}
max_docs=${11}
echo "Reranking with $chosen_model..."
dataset_name_slash_to_dash=$(echo $1 | sed 's/\//--/g')
# make results/$dataset_name_slash_to_dash-$file_append/ if not exists
mkdir -p -m 777 results/$dataset_name_slash_to_dash
mkdir -p -m 777 results/$dataset_name_slash_to_dash/$file_append/
if [[ $chosen_model == "bm25_full" ]]; then
echo "Running bm25 full"
SAVE_PATH="results/$dataset_name_slash_to_dash/$file_append/$dataset_name_slash_to_dash-$file_append-run.txt"
echo "bash make_bm25_run.sh $SAVE_PATH $dataset_name doc_id title,text query_id text $query_comb $doc_comb"
bash make_bm25_run.sh $SAVE_PATH $dataset_name doc_id title,text query_id text $query_comb $doc_comb
RESULT=$?
if [ $RESULT -eq 0 ]; then
echo "success"
else
exit 1
fi
exit 0
fi
python -m expansions.rerank \
--model $chosen_model \
--dataset $dataset_name \
--output_run "results/$dataset_name_slash_to_dash/$file_append/$dataset_name_slash_to_dash-$file_append-run.txt" \
--batch_size 16 \
--chunk_idx $chunk_idx \
--chunk_queries $chunk_queries \
--fp16 \
--corpus $doc_comb \
--corpus_combination $doc_comb_type \
--queries $query_comb \
--queries_combination $query_comb_type \
--max_num_queries $max_queries \
--max_num_docs $max_docs \
--torch_compile
# example usage:
# bash rerank.sh "scifact_refute" "testing" 0 1 "none" "none" "none" "none" "facebook/contriever-msmarco" 10 100