experiments.conf

# Word embeddings.
glove_300d {
  path = glove.840B.300d.txt
  size = 300
}
glove_300d_2w {
  path = glove_50_300_2.txt
  size = 300
}


base  {
  max_training_sentences = 50

  ffnn_size = 150
  ffnn_depth = 2

  # Learning hyperparameters.
  max_gradient_norm = 5.0
  lstm_dropout_rate = 0.4
  lexical_dropout_rate = 0.5
  dropout_rate = 0.2
  optimizer = adam
  learning_rate = 0.001
  decay_rate = 0.999
  decay_frequency = 100

  # Other.
  train_path = train.english.jsonlines
  eval_path = dev.english.jsonlines
  lm_path = elmo_cache.hdf5
  test_path = test.english.jsonlines
  test_lm_path = elmo_cache.hdf5
  eval_frequency = 500
  report_frequency = 100
  log_root = logs
  max_step = 40000
}


# Main configuration.
biaffinemd = ${base}{
  model_type = biaffine #lee, biaffine or bert
  top_span_ratio = 0.4
  mention_selection_method= high_recall #or high_f1
  mention_selection_threshold = 0.5 # for high_f1

  contextualization_size = 200
  contextualization_layers = 3
  lm_layers = 3
  lm_size = 1024
}

leemd = ${base}{
  model_type = lee #lee, biaffine or bert
  top_span_ratio = 0.4
  mention_selection_method= high_recall #or high_f1
  mention_selection_threshold = 0.5 # for high_f1

  filter_widths = [3, 4, 5]
  filter_size = 50
  char_embedding_size = 8
  char_vocab_path = "char_vocab.english.txt"
  context_embeddings = ${glove_300d}
  head_embeddings = ${glove_300d_2w}

  contextualization_size = 200
  contextualization_layers = 3
  lm_layers = 3
  lm_size = 1024
  feature_size = 20
  max_span_width = 30
}

bertmd = ${base}{
  model_type = bert #lee, biaffine or bert
  top_span_ratio = 0.4
  mention_selection_method= high_recall #or high_f1
  mention_selection_threshold = 0.5 # for high_f1

  bert_size = 768
  max_bert_sent_len=256
  bert_url="https://tfhub.dev/google/bert_cased_L-12_H-768_A-12/1"

  max_span_width = 30

  learning_rate = 0.00002
}