-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_eval_cs_verbs.py
executable file
·50 lines (43 loc) · 1.73 KB
/
prepare_eval_cs_verbs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python
#
# Author: (c) 2016 Vincent Kriz <[email protected]>
#
import sys
import logging
import argparse
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats.stats import spearmanr
# Logging.
logging.basicConfig(format='%(asctime)-15s [%(levelname)7s] %(funcName)s - %(message)s', level=logging.DEBUG)
# Parse command line arguments.
parser = argparse.ArgumentParser()
parser.description = 'Split translated SimLex-999 to a cross-validation according to given template.'
parser.add_argument('--template', required=True, help='a file with fold template')
parser.add_argument('--original', required=True, help='an original SimLex-999 file')
parser.add_argument('--final', required=True, help='a file with final output')
args = parser.parse_args()
# Load vocabulary.
logging.info('Loading template...')
template = []
with open(args.template, 'r') as ftemplate:
for line in ftemplate:
fields = line.rstrip().split(',')
template.append('%s,%s' % (fields[0], fields[1]))
logging.info('Loading original...')
original = []
with open(args.original, 'r') as foriginal:
for line in foriginal:
original.append(line.rstrip())
# Process the final output
logging.info('Creating %s...', args.final)
with open(args.final, 'w') as ffinal:
for pair_to_add in template:
for original_data in original:
fields = original_data.rstrip().split('\t')
similarity = fields[3].replace(',', '.')
if '%s,%s' % (fields[0], fields[1]) == pair_to_add:
try:
ffinal.write('%s,%s,%s\n' % (fields[10], fields[11], similarity))
except:
logging.error('A problem occurred for pair %s', pair_to_add)