forked from piergiaj/tgm-icml19
-
Notifications
You must be signed in to change notification settings - Fork 0
/
multithumos_i3d_per_video.py
106 lines (81 loc) · 3.02 KB
/
multithumos_i3d_per_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import torch
import torch.utils.data as data_utl
from torch.utils.data.dataloader import default_collate
import numpy as np
import json
import csv
import os
import os.path
def video_to_tensor(pic):
"""Convert a ``numpy.ndarray`` to tensor.
Converts a numpy.ndarray (T x H x W x C)
to a torch.FloatTensor of shape (C x T x H x W)
Args:
pic (numpy.ndarray): Video to be converted to tensor.
Returns:
Tensor: Converted video.
"""
return torch.from_numpy(pic.transpose([3,0,1,2]))
def make_dataset(split_file, split, root, num_classes=65):
dataset = []
with open(split_file, 'r') as f:
data = json.load(f)
i = 0
for vid in data.keys():
if data[vid]['subset'] != split:
continue
if not os.path.exists(os.path.join(root, vid+'.npy')):
continue
fts = np.load(os.path.join(root, vid+'.npy'))
num_feat = fts.shape[0]
label = np.zeros((num_feat,num_classes), np.float32)
fps = num_feat/data[vid]['duration']
for ann in data[vid]['actions']:
for fr in range(0,num_feat,1):
if fr/fps > ann[1] and fr/fps < ann[2]:
label[fr, ann[0]-1] = 1 # binary classification, class index -1 to make 0 indexed
dataset.append((vid, label, data[vid]['duration']))
i += 1
return dataset
# make_dataset('multithumos.json', 'training', '/ssd2/thumos/val_i3d_rgb')
class MultiThumos(data_utl.Dataset):
def __init__(self, split_file, split, root, batch_size):
self.data = make_dataset(split_file, split, root)
self.split_file = split_file
self.batch_size = batch_size
self.root = root
self.in_mem = {}
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (image, target) where target is class_index of the target class.
"""
entry = self.data[index]
if entry[0] in self.in_mem:
feat = self.in_mem[entry[0]]
else:
feat = np.load(os.path.join(self.root, entry[0]+'.npy'))
feat = feat.astype(np.float32)#[::3,0].transpose(0,2,3,1)
self.in_mem[entry[0]] = feat
label = entry[1]#[::3]
return feat, label, [entry[0], entry[2]]
def __len__(self):
return len(self.data)
def mt_collate_fn(batch):
"Pads data and puts it into a tensor of same dimensions"
max_len = 0
for b in batch:
if b[0].shape[0] > max_len:
max_len = b[0].shape[0]
new_batch = []
for b in batch:
f = np.zeros((max_len, b[0].shape[1], b[0].shape[2], b[0].shape[3]), np.float32)
m = np.zeros((max_len), np.float32)
l = np.zeros((max_len, b[1].shape[1]), np.float32)
f[:b[0].shape[0]] = b[0]
m[:b[0].shape[0]] = 1
l[:b[0].shape[0], :] = b[1]
new_batch.append([video_to_tensor(f), torch.from_numpy(m), torch.from_numpy(l), b[2]])
return default_collate(new_batch)