forked from antoine77340/Youtube-8M-WILLOW
-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.py
162 lines (136 loc) · 5.75 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains a collection of util functions for training and evaluating.
"""
import numpy
import tensorflow as tf
from tensorflow import logging
def Dequantize(feat_vector, max_quantized_value=2, min_quantized_value=-2):
"""Dequantize the feature from the byte format to the float format.
Args:
feat_vector: the input 1-d vector.
max_quantized_value: the maximum of the quantized value.
min_quantized_value: the minimum of the quantized value.
Returns:
A float vector which has the same shape as feat_vector.
"""
assert max_quantized_value > min_quantized_value
quantized_range = max_quantized_value - min_quantized_value
scalar = quantized_range / 255.0
bias = (quantized_range / 512.0) + min_quantized_value
return feat_vector * scalar + bias
def MakeSummary(name, value):
"""Creates a tf.Summary proto with the given name and value."""
summary = tf.Summary()
val = summary.value.add()
val.tag = str(name)
val.simple_value = float(value)
return summary
def AddGlobalStepSummary(summary_writer,
global_step_val,
global_step_info_dict,
summary_scope="Eval"):
"""Add the global_step summary to the Tensorboard.
Args:
summary_writer: Tensorflow summary_writer.
global_step_val: a int value of the global step.
global_step_info_dict: a dictionary of the evaluation metrics calculated for
a mini-batch.
summary_scope: Train or Eval.
Returns:
A string of this global_step summary
"""
this_hit_at_one = global_step_info_dict["hit_at_one"]
this_perr = global_step_info_dict["perr"]
this_loss = global_step_info_dict["loss"]
examples_per_second = global_step_info_dict.get("examples_per_second", -1)
summary_writer.add_summary(
MakeSummary("GlobalStep/" + summary_scope + "_Hit@1", this_hit_at_one),
global_step_val)
summary_writer.add_summary(
MakeSummary("GlobalStep/" + summary_scope + "_Perr", this_perr),
global_step_val)
summary_writer.add_summary(
MakeSummary("GlobalStep/" + summary_scope + "_Loss", this_loss),
global_step_val)
if examples_per_second != -1:
summary_writer.add_summary(
MakeSummary("GlobalStep/" + summary_scope + "_Example_Second",
examples_per_second), global_step_val)
summary_writer.flush()
info = ("global_step {0} | Batch Hit@1: {1:.3f} | Batch PERR: {2:.3f} | Batch Loss: {3:.3f} "
"| Examples_per_sec: {4:.3f}").format(
global_step_val, this_hit_at_one, this_perr, this_loss,
examples_per_second)
return info
def AddEpochSummary(summary_writer,
global_step_val,
epoch_info_dict,
summary_scope="Eval"):
"""Add the epoch summary to the Tensorboard.
Args:
summary_writer: Tensorflow summary_writer.
global_step_val: a int value of the global step.
epoch_info_dict: a dictionary of the evaluation metrics calculated for the
whole epoch.
summary_scope: Train or Eval.
Returns:
A string of this global_step summary
"""
epoch_id = epoch_info_dict["epoch_id"]
avg_hit_at_one = epoch_info_dict["avg_hit_at_one"]
avg_perr = epoch_info_dict["avg_perr"]
avg_loss = epoch_info_dict["avg_loss"]
aps = epoch_info_dict["aps"]
gap = epoch_info_dict["gap"]
mean_ap = numpy.mean(aps)
summary_writer.add_summary(
MakeSummary("Epoch/" + summary_scope + "_Avg_Hit@1", avg_hit_at_one),
global_step_val)
summary_writer.add_summary(
MakeSummary("Epoch/" + summary_scope + "_Avg_Perr", avg_perr),
global_step_val)
summary_writer.add_summary(
MakeSummary("Epoch/" + summary_scope + "_Avg_Loss", avg_loss),
global_step_val)
summary_writer.add_summary(
MakeSummary("Epoch/" + summary_scope + "_MAP", mean_ap),
global_step_val)
summary_writer.add_summary(
MakeSummary("Epoch/" + summary_scope + "_GAP", gap),
global_step_val)
summary_writer.flush()
info = ("epoch/eval number {0} | Avg_Hit@1: {1:.3f} | Avg_PERR: {2:.3f} "
"| MAP: {3:.3f} | GAP: {4:.4f} | Avg_Loss: {5:3f}").format(
epoch_id, avg_hit_at_one, avg_perr, mean_ap, gap, avg_loss)
return info
def GetListOfFeatureNamesAndSizes(feature_names, feature_sizes):
"""Extract the list of feature names and the dimensionality of each feature
from string of comma separated values.
Args:
feature_names: string containing comma separated list of feature names
feature_sizes: string containing comma separated list of feature sizes
Returns:
List of the feature names and list of the dimensionality of each feature.
Elements in the first/second list are strings/integers.
"""
list_of_feature_names = [
feature_names.strip() for feature_names in feature_names.split(',')]
list_of_feature_sizes = [
int(feature_sizes) for feature_sizes in feature_sizes.split(',')]
if len(list_of_feature_names) != len(list_of_feature_sizes):
logging.error("length of the feature names (=" +
str(len(list_of_feature_names)) + ") != length of feature "
"sizes (=" + str(len(list_of_feature_sizes)) + ")")
return list_of_feature_names, list_of_feature_sizes