NervanaSystems · mingshan-wang · Mar 29, 2019 · Apr 1, 2019 · Apr 10, 2019 · Apr 10, 2019
diff --git a/test/validate_resnet50/datasets_make_deterministic.patch b/test/validate_resnet50/datasets_make_deterministic.patch
@@ -0,0 +1,89 @@
+diff --git a/scripts/tf_cnn_benchmarks/benchmark_cnn.py b/scripts/tf_cnn_benchmarks/benchmark_cnn.py
+index 09b118e..4cf9a12 100644
+--- a/scripts/tf_cnn_benchmarks/benchmark_cnn.py
++++ b/scripts/tf_cnn_benchmarks/benchmark_cnn.py
+@@ -34,6 +34,7 @@ import numpy as np
+ import six
+ from six.moves import xrange  # pylint: disable=redefined-builtin
+ import tensorflow as tf
++import ngraph_bridge
+
+ from google.protobuf import text_format
+
+@@ -2479,6 +2480,7 @@ class BenchmarkCNN(object):
+     fetches = self._build_fetches(global_step, all_logits, losses, device_grads,
+                                   enqueue_ops, update_ops, all_accuracy_ops,
+                                   phase_train)
++
+     if global_input_producer_op:
+       global_input_producer_op = tf.group(*global_input_producer_op)
+     else:
+diff --git a/scripts/tf_cnn_benchmarks/data_utils.py b/scripts/tf_cnn_benchmarks/data_utils.py
+index 0376d0b..992ee75 100644
+--- a/scripts/tf_cnn_benchmarks/data_utils.py
++++ b/scripts/tf_cnn_benchmarks/data_utils.py
+@@ -112,7 +112,10 @@ def create_dataset(batch_size,
+   if not file_names:
+     raise ValueError('Found no files in --data_dir matching: {}'
+                      .format(glob_pattern))
+-  ds = tf.data.TFRecordDataset.list_files(file_names)
++
++ # ds = tf.data.TFRecordDataset.list_files(file_names)
++  ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False, seed=10)
++
+   ds = ds.apply(
+       interleave_ops.parallel_interleave(
+           tf.data.TFRecordDataset, cycle_length=10))
+@@ -122,8 +125,9 @@ def create_dataset(batch_size,
+   counter = counter.repeat()
+   ds = tf.data.Dataset.zip((ds, counter))
+   ds = ds.prefetch(buffer_size=batch_size)
+-  if train:
+-    ds = ds.shuffle(buffer_size=10000)
++  # Make dataset loader deterministic
++  # if train:
++  #   ds = ds.shuffle(buffer_size=10000)
+   ds = ds.repeat()
+   ds = ds.apply(
+       batching.map_and_batch(
+diff --git a/scripts/tf_cnn_benchmarks/preprocessing.py b/scripts/tf_cnn_benchmarks/preprocessing.py
+index 6a270b0..4e84a1a 100644
+--- a/scripts/tf_cnn_benchmarks/preprocessing.py
++++ b/scripts/tf_cnn_benchmarks/preprocessing.py
+@@ -335,9 +335,11 @@ def train_image(image_buffer,
+     else:
+       image = tf.image.decode_jpeg(image_buffer, channels=3,
+                                    dct_method='INTEGER_FAST')
+-      image = tf.slice(image, bbox_begin, bbox_size)
+
+-    distorted_image = tf.image.random_flip_left_right(image)
++      #image = tf.slice(image, bbox_begin, bbox_size)
++
++    #distorted_image = tf.image.random_flip_left_right(image)
++    distorted_image = image
+
+     # This resizing operation may distort the images because the aspect
+     # ratio is not respected.
+@@ -361,7 +363,7 @@ def train_image(image_buffer,
+       distorted_image = distort_color(distorted_image, batch_position,
+                                       distort_color_in_yiq=distort_color_in_yiq)
+
+-      # Note: This ensures the scaling matches the output of eval_image
++      #Note: This ensures the scaling matches the output of eval_image
+       distorted_image *= 255
+
+     if summary_verbosity >= 3:
+@@ -487,10 +489,11 @@ class RecordInputImagePreprocessor(BaseImagePreprocessor):
+     """Preprocessing image_buffer as a function of its batch position."""
+     if self.train:
+       image = train_image(image_buffer, self.height, self.width, bbox,
+-                          batch_position, self.resize_method, self.distortions,
++                          batch_position, self.resize_method, False,
+                           None, summary_verbosity=self.summary_verbosity,
+                           distort_color_in_yiq=self.distort_color_in_yiq,
+-                          fuse_decode_and_crop=self.fuse_decode_and_crop)
++                          #fuse_decode_and_crop=self.fuse_decode_and_crop
++                          fuse_decode_and_crop=False)
+     else:
+       image = tf.image.decode_jpeg(
+           image_buffer, channels=3, dct_method='INTEGER_FAST')
diff --git a/test/validate_resnet50/one_encapsulate.patch b/test/validate_resnet50/one_encapsulate.patch
@@ -0,0 +1,38 @@
+diff --git a/scripts/tf_cnn_benchmarks/benchmark_cnn.py b/scripts/tf_cnn_benchmarks/benchmark_cnn.py
+index 09b118e..d5a4e29 100644
+--- a/scripts/tf_cnn_benchmarks/benchmark_cnn.py
++++ b/scripts/tf_cnn_benchmarks/benchmark_cnn.py
+@@ -34,6 +34,7 @@ import numpy as np
+ import six
+ from six.moves import xrange  # pylint: disable=redefined-builtin
+ import tensorflow as tf
++import ngraph_bridge
+
+ from google.protobuf import text_format
+
+@@ -726,13 +727,23 @@ def benchmark_one_step(sess,
+   summary_str = None
+   start_time = time.time()
+   if summary_op is None:
+-    results = sess.run(fetches, options=run_options, run_metadata=run_metadata)
++    # get a new set of fetch operation
++    new_fetches = {}
++    for f in fetches:
++      if f == "average_loss":
++        continue
++      new_fetches[f] = fetches[f]
++    
++    results = sess.run(new_fetches, options=run_options, run_metadata=run_metadata)
++    #results = sess.run(fetches, options=run_options, run_metadata=run_metadata)
+   else:
+     (results, summary_str) = sess.run(
+         [fetches, summary_op], options=run_options, run_metadata=run_metadata)
+
+   if not params.forward_only:
+-    lossval = results['average_loss']
++    # the calculation is removed in the operations to be fetched
++    #lossval = results['average_loss']
++    lossval = 0
+   else:
+     lossval = 0.
+   if image_producer is not None:
diff --git a/test/validate_resnet50/tfGPU_results/resnet50_tf_realData_BS100.txt b/test/validate_resnet50/tfGPU_results/resnet50_tf_realData_BS100.txt
@@ -0,0 +1,53 @@
+W0328 15:31:38.178014 140647503410944 tf_logging.py:125] From /localdisk/mingshan/benchmarks/scripts/tf_cnn_benchmarks/data_utils.py:121: parallel_interleave (from tensorflow.contrib.data.python.ops.interleave_ops) is deprecated and will be removed in a future version.
+Instructions for updating:
+Use `tf.data.experimental.parallel_interleave(...)`.
+W0328 15:31:38.197099 140647503410944 tf_logging.py:125] From /localdisk/mingshan/benchmarks/scripts/tf_cnn_benchmarks/data_utils.py:136: map_and_batch (from tensorflow.contrib.data.python.ops.batching) is deprecated and will be removed in a future version.
+Instructions for updating:
+Use `tf.data.experimental.map_and_batch(...)`.
+W0328 15:31:41.318058 140647503410944 tf_logging.py:125] From /localdisk/mingshan/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:1842: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
+Instructions for updating:
+Please switch to tf.train.MonitoredTrainingSession
+2019-03-28 15:31:41.734261: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
+2019-03-28 15:31:42.684223: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: 
+name: TITAN Xp major: 6 minor: 1 memoryClockRate(GHz): 1.582
+pciBusID: 0000:0c:00.0
+totalMemory: 11.91GiB freeMemory: 11.75GiB
+2019-03-28 15:31:42.684259: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0
+2019-03-28 15:31:42.996125: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix:
+2019-03-28 15:31:42.996179: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988]      0 
+2019-03-28 15:31:42.996185: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0:   N 
+2019-03-28 15:31:42.996527: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11364 MB memory) -> physical GPU (device: 0, name: TITAN Xp, pci bus id: 0000:0c:00.0, compute capability: 6.1)
+I0328 15:31:43.550181 140647503410944 tf_logging.py:115] Running local_init_op.
+I0328 15:31:49.378333 140647503410944 tf_logging.py:115] Done running local_init_op.
+TensorFlow:  1.12
+Model:       resnet50
+Dataset:     imagenet
+Mode:        training
+SingleSess:  False
+Batch size:  32 global
+             32.0 per device
+Num batches: 100
+Num epochs:  0.00
+Devices:     ['/gpu:0']
+Data format: NCHW
+Optimizer:   sgd
+Variables:   parameter_server
+==========
+Generating model
+Running warm up
+Done warm up
+Step	Img/sec	total_loss	top_1_accuracy	top_5_accuracy
+1	images/sec: 217.4 +/- 0.0 (jitter = 0.0)	8.360	0.000	0.000
+10	images/sec: 215.6 +/- 0.4 (jitter = 1.2)	8.027	0.000	0.000
+20	images/sec: 215.0 +/- 0.3 (jitter = 1.0)	8.333	0.000	0.031
+30	images/sec: 214.9 +/- 0.3 (jitter = 1.0)	8.215	0.000	0.000
+40	images/sec: 214.8 +/- 0.2 (jitter = 1.1)	8.189	0.000	0.000
+50	images/sec: 214.6 +/- 0.2 (jitter = 1.3)	8.177	0.000	0.000
+60	images/sec: 214.3 +/- 0.2 (jitter = 1.4)	8.112	0.000	0.000
+70	images/sec: 214.3 +/- 0.2 (jitter = 1.5)	8.185	0.000	0.000
+80	images/sec: 214.2 +/- 0.2 (jitter = 1.5)	8.120	0.000	0.000
+90	images/sec: 214.2 +/- 0.2 (jitter = 1.5)	8.254	0.000	0.000
+100	images/sec: 214.3 +/- 0.2 (jitter = 1.6)	8.093	0.000	0.000
+----------------------------------------------------------------
+total images/sec: 213.97
+----------------------------------------------------------------
diff --git a/test/validate_resnet50/tfGPU_results/resnet50_tf_realData_BS200.txt b/test/validate_resnet50/tfGPU_results/resnet50_tf_realData_BS200.txt
@@ -0,0 +1,63 @@
+W0328 15:32:46.495650 140049874691840 tf_logging.py:125] From /localdisk/mingshan/benchmarks/scripts/tf_cnn_benchmarks/data_utils.py:121: parallel_interleave (from tensorflow.contrib.data.python.ops.interleave_ops) is deprecated and will be removed in a future version.
+Instructions for updating:
+Use `tf.data.experimental.parallel_interleave(...)`.
+W0328 15:32:46.520555 140049874691840 tf_logging.py:125] From /localdisk/mingshan/benchmarks/scripts/tf_cnn_benchmarks/data_utils.py:136: map_and_batch (from tensorflow.contrib.data.python.ops.batching) is deprecated and will be removed in a future version.
+Instructions for updating:
+Use `tf.data.experimental.map_and_batch(...)`.
+W0328 15:32:49.878959 140049874691840 tf_logging.py:125] From /localdisk/mingshan/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:1842: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
+Instructions for updating:
+Please switch to tf.train.MonitoredTrainingSession
+2019-03-28 15:32:50.317750: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
+2019-03-28 15:32:51.857166: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: 
+name: TITAN Xp major: 6 minor: 1 memoryClockRate(GHz): 1.582
+pciBusID: 0000:0c:00.0
+totalMemory: 11.91GiB freeMemory: 11.75GiB
+2019-03-28 15:32:51.857212: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0
+2019-03-28 15:32:52.273544: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix:
+2019-03-28 15:32:52.273603: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988]      0 
+2019-03-28 15:32:52.273610: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0:   N 
+2019-03-28 15:32:52.273926: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11364 MB memory) -> physical GPU (device: 0, name: TITAN Xp, pci bus id: 0000:0c:00.0, compute capability: 6.1)
+I0328 15:32:52.851403 140049874691840 tf_logging.py:115] Running local_init_op.
+I0328 15:32:58.568639 140049874691840 tf_logging.py:115] Done running local_init_op.
+TensorFlow:  1.12
+Model:       resnet50
+Dataset:     imagenet
+Mode:        training
+SingleSess:  False
+Batch size:  32 global
+             32.0 per device
+Num batches: 200
+Num epochs:  0.00
+Devices:     ['/gpu:0']
+Data format: NCHW
+Optimizer:   sgd
+Variables:   parameter_server
+==========
+Generating model
+Running warm up
+Done warm up
+Step	Img/sec	total_loss	top_1_accuracy	top_5_accuracy
+1	images/sec: 217.6 +/- 0.0 (jitter = 0.0)	8.360	0.000	0.000
+10	images/sec: 216.7 +/- 0.4 (jitter = 0.3)	8.027	0.000	0.000
+20	images/sec: 216.6 +/- 0.3 (jitter = 0.6)	8.333	0.000	0.031
+30	images/sec: 216.3 +/- 0.3 (jitter = 1.0)	8.216	0.000	0.000
+40	images/sec: 216.3 +/- 0.2 (jitter = 1.0)	8.188	0.000	0.000
+50	images/sec: 216.1 +/- 0.2 (jitter = 1.2)	8.177	0.000	0.000
+60	images/sec: 216.3 +/- 0.2 (jitter = 1.1)	8.107	0.000	0.000
+70	images/sec: 216.4 +/- 0.2 (jitter = 1.0)	8.189	0.000	0.000
+80	images/sec: 216.4 +/- 0.2 (jitter = 0.9)	8.114	0.000	0.000
+90	images/sec: 216.4 +/- 0.2 (jitter = 0.8)	8.246	0.000	0.000
+100	images/sec: 216.3 +/- 0.2 (jitter = 1.1)	8.081	0.000	0.000
+110	images/sec: 216.0 +/- 0.2 (jitter = 1.4)	8.363	0.000	0.000
+120	images/sec: 215.7 +/- 0.2 (jitter = 1.8)	8.027	0.000	0.000
+130	images/sec: 215.5 +/- 0.2 (jitter = 2.1)	8.323	0.000	0.000
+140	images/sec: 215.3 +/- 0.2 (jitter = 2.4)	8.440	0.000	0.000
+150	images/sec: 215.2 +/- 0.2 (jitter = 2.8)	8.038	0.000	0.000
+160	images/sec: 215.0 +/- 0.2 (jitter = 2.9)	8.059	0.000	0.000
+170	images/sec: 214.8 +/- 0.2 (jitter = 3.1)	8.318	0.000	0.000
+180	images/sec: 214.7 +/- 0.2 (jitter = 3.0)	8.113	0.000	0.000
+190	images/sec: 214.6 +/- 0.2 (jitter = 3.0)	8.010	0.031	0.062
+200	images/sec: 214.5 +/- 0.2 (jitter = 2.9)	8.379	0.000	0.000
+----------------------------------------------------------------
+total images/sec: 214.32
+----------------------------------------------------------------
diff --git a/test/validate_resnet50/tfGPU_results/resnet50_tf_syntheticData_BS200.txt b/test/validate_resnet50/tfGPU_results/resnet50_tf_syntheticData_BS200.txt
@@ -0,0 +1,57 @@
+W0328 16:15:34.093370 140329842439936 tf_logging.py:125] From /localdisk/mingshan/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:1842: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
+Instructions for updating:
+Please switch to tf.train.MonitoredTrainingSession
+2019-03-28 16:15:34.516200: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
+2019-03-28 16:15:36.297633: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: 
+name: TITAN Xp major: 6 minor: 1 memoryClockRate(GHz): 1.582
+pciBusID: 0000:0c:00.0
+totalMemory: 11.91GiB freeMemory: 11.75GiB
+2019-03-28 16:15:36.297676: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0
+2019-03-28 16:15:36.626773: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix:
+2019-03-28 16:15:36.626811: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988]      0 
+2019-03-28 16:15:36.626818: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0:   N 
+2019-03-28 16:15:36.627131: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11364 MB memory) -> physical GPU (device: 0, name: TITAN Xp, pci bus id: 0000:0c:00.0, compute capability: 6.1)
+I0328 16:15:37.193925 140329842439936 tf_logging.py:115] Running local_init_op.
+I0328 16:15:37.224961 140329842439936 tf_logging.py:115] Done running local_init_op.
+TensorFlow:  1.12
+Model:       resnet50
+Dataset:     imagenet (synthetic)
+Mode:        training
+SingleSess:  False
+Batch size:  32 global
+             32.0 per device
+Num batches: 200
+Num epochs:  0.00
+Devices:     ['/gpu:0']
+Data format: NCHW
+Optimizer:   sgd
+Variables:   parameter_server
+==========
+Generating model
+Running warm up
+Done warm up
+Step	Img/sec	total_loss	top_1_accuracy	top_5_accuracy
+1	images/sec: 219.2 +/- 0.0 (jitter = 0.0)	8.229	0.000	0.000
+10	images/sec: 218.4 +/- 0.3 (jitter = 1.1)	8.305	0.000	0.000
+20	images/sec: 218.8 +/- 0.3 (jitter = 1.2)	7.921	0.000	0.000
+30	images/sec: 218.7 +/- 0.3 (jitter = 1.4)	8.055	0.000	0.000
+40	images/sec: 218.0 +/- 0.3 (jitter = 2.3)	8.293	0.000	0.000
+50	images/sec: 217.9 +/- 0.3 (jitter = 1.9)	8.092	0.000	0.000
+60	images/sec: 218.0 +/- 0.2 (jitter = 1.8)	8.082	0.000	0.000
+70	images/sec: 218.1 +/- 0.2 (jitter = 1.7)	8.270	0.000	0.000
+80	images/sec: 218.2 +/- 0.2 (jitter = 1.5)	8.177	0.000	0.000
+90	images/sec: 218.3 +/- 0.2 (jitter = 1.5)	7.983	0.031	0.031
+100	images/sec: 218.3 +/- 0.2 (jitter = 1.5)	8.488	0.000	0.000
+110	images/sec: 218.1 +/- 0.1 (jitter = 1.6)	8.207	0.000	0.000
+120	images/sec: 218.1 +/- 0.1 (jitter = 1.5)	7.931	0.000	0.000
+130	images/sec: 218.0 +/- 0.1 (jitter = 1.5)	8.370	0.000	0.000
+140	images/sec: 218.0 +/- 0.1 (jitter = 1.3)	8.345	0.000	0.000
+150	images/sec: 217.9 +/- 0.1 (jitter = 1.2)	8.192	0.000	0.031
+160	images/sec: 217.9 +/- 0.1 (jitter = 1.2)	8.313	0.031	0.031
+170	images/sec: 217.9 +/- 0.1 (jitter = 1.1)	8.381	0.000	0.000
+180	images/sec: 217.9 +/- 0.1 (jitter = 1.0)	8.061	0.031	0.031
+190	images/sec: 217.8 +/- 0.1 (jitter = 1.0)	8.239	0.000	0.031
+200	images/sec: 217.8 +/- 0.1 (jitter = 1.0)	8.045	0.000	0.000
+----------------------------------------------------------------
+total images/sec: 217.64
+----------------------------------------------------------------