-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvqa_manual.py
116 lines (92 loc) · 3.46 KB
/
vqa_manual.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import json
import itertools
import sys
# Replace 'your_file.json' with the path to your actual JSON file
file_path = '/scratch/irving.b/vqa/annotations/v2_OpenEnded_mscoco_train2014_questions.json'
other_file_path = '/scratch/irving.b/vqa/annotations/v2_mscoco_train2014_annotations.json'
# Open the JSON file and load its content into a dictionary
with open(file_path, 'r') as file:
data = json.load(file)
with open(other_file_path, 'r') as file:
data_2 = json.load(file)
# how to work with this data
from typing import List, Dict, Any
def get_score(count: int) -> float:
"""
Calculate a score based on the count.
Parameters:
- count (int): The count of something (e.g., answers).
Returns:
- float: The calculated score, capped at 1.0.
"""
return min(1.0, count / 3)
def process_annotations(annotations: List[Dict[str, Any]], config: Any) -> None:
"""
Process a list of annotations, updating each with labels and scores.
Parameters:
- annotations (List[Dict[str, Any]]): A list of annotation dictionaries.
- config (Any): Configuration object that contains mapping of labels to IDs.
Note: This function updates the annotations list in place.
"""
for annotation in annotations:
answers = annotation['answers']
answer_count = {}
# Count occurrences of each unique answer
for answer in answers:
answer_ = answer["answer"]
answer_count[answer_] = answer_count.get(answer_, 0) + 1
labels = []
scores = []
# Calculate scores for answers that are mapped to labels
for answer in answer_count:
if answer not in list(config.label2id.keys()):
continue
labels.append(config.label2id[answer])
score = get_score(answer_count[answer])
scores.append(score)
# Update the annotation with calculated labels and scores
annotation['labels'] = labels
annotation['scores'] = scores
annotations = data_2['annotations'][0:2]
print(annotations)
# hic sunc leones
labels = [[item['answer_id'] for item in annotation['answers']] for annotation in annotations]
unique_labels = [list(set(unique)) for unique in labels]
print(unique_labels)
# we want to make a dictionary mapping of labels2id
# then id2label
answers = [[item['answer'] for item in annotation['answers']] for annotation in annotations]
unique_answers = [list(set(unique)) for unique in answers]
print(unique_answers)
for answer in unique_answers:
print({label:idx for idx, label in enumerate(answer)})
for idx, answer in enumerate(answer):
print(idx)
print(answer)
# now, how to make the two dictionaries:
# first, id2label (give the dictionary an id, then map it to a label)
# a dictionary for each entry, correct? (correct, unfortunately)
# doing in this in batches would be more effective, no?
# naive implementation
# some dynamic dispatch
# oh my goodness bro hasn't used his brain in years
id2_labels_list = []
index = 0
for unique in unique_labels:
id2label = {}
for label in unique:
id2label[label] = annotations[index]['answers'][label-1]['answer']
id2_labels_list.append(id2label)
index += 1
# how to create id2labels
#print(id2_labels_list)
# now: labels to ids?
#labels_to_ids = []
#index = 0
#for unique in unique_answers:
# id2label = {}
# then, we are going to create a
# for answer in unique:
sys.exit()
process_annotations(annotations)
print(annotations)