forked from pcyin/NL2code
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.sh
executable file
·65 lines (59 loc) · 2.19 KB
/
train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
output="runs"
device="cuda0"
## Originally the batch size was 10 and epoch size was 200 for hs
## Originally the batch size was 10, epoch size was 50, valid_per_batch and save_per_batch were 4000 each for django
if [ "$2" == "gColab" ]; then
echo "Google Colab running"
batch_size=10
max_epoch_size=10
else
batch_size=10
max_epoch_size=200
fi
if [ "$1" == "hs" ]; then
# hs dataset
echo "training hs dataset"
echo "batch size : ${batch_size}"
echo "max epoch size : ${max_epoch_size}"
dataset="hs.freq3.pre_suf.unary_closure.bin"
commandline="-batch_size ${batch_size} -max_epoch ${max_epoch_size} -valid_per_batch 280 -save_per_batch 280 -decode_max_time_step 350 -optimizer adadelta -rule_embed_dim 128 -node_embed_dim 64 -valid_metric bleu"
datatype="hs"
else
# django dataset
batch_size=10
max_epoch_size=2
valid_per_batch_val=500
save_per_batch_val=500
echo "training django dataset"
echo "batch size : ${batch_size}"
echo "max epoch size : ${max_epoch_size}"
echo "valid_per_batch_val : ${valid_per_batch_val}"
echo "save_per_batch_val : ${save_per_batch_val}"
dataset="django.cleaned.dataset.freq5.par_info.refact.space_only.bin"
commandline="-batch_size ${batch_size} -max_epoch ${max_epoch_size} -valid_per_batch ${valid_per_batch_val} -save_per_batch ${save_per_batch_val} -decode_max_time_step 100 -optimizer adam -rule_embed_dim 128 -node_embed_dim 64 -valid_metric bleu"
datatype="django"
fi
# train the model
THEANO_FLAGS="mode=FAST_RUN,device=${device},floatX=float32" python -u code_gen.py \
-data_type ${datatype} \
-data data/${dataset} \
-output_dir ${output} \
${commandline} \
train
# decode testing set, and evaluate the model which achieves the best bleu and accuracy, resp.
for model in "model.best_bleu.npz" "model.best_acc.npz"; do
THEANO_FLAGS="mode=FAST_RUN,device=${device},floatX=float32" python code_gen.py \
-data_type ${datatype} \
-data data/${dataset} \
-output_dir ${output} \
-model ${output}/${model} \
${commandline} \
decode \
-saveto ${output}/${model}.decode_results.test.bin
python code_gen.py \
-data_type ${datatype} \
-data data/${dataset} \
-output_dir ${output} \
evaluate \
-input ${output}/${model}.decode_results.test.bin
done