Auto-merge updates from auto-update branch

mlcommons · Dec 25, 2024 · 8f142b6 · 8f142b6
2 parents 66cb177 + 44ec4c9
commit 8f142b6
Show file tree

Hide file tree

Showing 52 changed files with 1,136 additions and 1,136 deletions.
diff --git a/closed/MLCommons/systems/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config.json b/closed/MLCommons/systems/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config.json
@@ -1,9 +1,9 @@
 {
-  "accelerator_frequency": "2610000 MHz",
+  "accelerator_frequency": "2520000 MHz",
   "accelerator_host_interconnect": "N/A",
   "accelerator_interconnect": "N/A",
   "accelerator_interconnect_topology": "",
-  "accelerator_memory_capacity": "23.54595947265625 GB",
+  "accelerator_memory_capacity": "23.64971923828125 GB",
   "accelerator_memory_configuration": "N/A",
   "accelerator_model_name": "NVIDIA GeForce RTX 4090",
   "accelerator_on-chip_memories": "",
@@ -16,17 +16,17 @@
   "host_network_card_count": "1",
   "host_networking": "Gig Ethernet",
   "host_networking_topology": "N/A",
-  "host_processor_caches": "L1d cache: 512 KiB, L1i cache: 512 KiB, L2 cache: 16 MiB, L3 cache: 64 MiB",
-  "host_processor_core_count": "16",
-  "host_processor_frequency": "5881.0000",
+  "host_processor_caches": "L1d cache: 576 KiB, L1i cache: 384 KiB, L2 cache: 24 MiB, L3 cache:  ",
+  "host_processor_core_count": "24",
+  "host_processor_frequency": "5800.0000",
   "host_processor_interconnect": "",
-  "host_processor_model_name": "AMD Ryzen 9 7950X 16-Core Processor",
+  "host_processor_model_name": "13th Gen Intel(R) Core(TM) i9-13900K",
   "host_processors_per_node": "1",
-  "host_storage_capacity": "6.8T",
+  "host_storage_capacity": "9.4T",
   "host_storage_type": "SSD",
   "hw_notes": "",
   "number_of_nodes": "1",
-  "operating_system": "Ubuntu 20.04 (linux-6.8.0-51-generic-glibc2.31)",
+  "operating_system": "Ubuntu 20.04 (linux-6.8.0-49-generic-glibc2.31)",
   "other_software_stack": "Python: 3.8.10, GCC-9.4.0, Using Docker  , CUDA 12.2",
   "status": "available",
   "submitter": "MLCommons",

diff --git a/...inal-gpu-tensorrt-vdefault-default_config/stable-diffusion-xl/offline/README.md b/...inal-gpu-tensorrt-vdefault-default_config/stable-diffusion-xl/offline/README.md
@@ -19,7 +19,7 @@ pip install -U cmind
 
 cm rm cache -f
 
-cm pull repo mlcommons@mlperf-automations --checkout=a9e8329cf5f036aea3c491f0a375cce2d89b5cd1
+cm pull repo mlcommons@mlperf-automations --checkout=3551660b68ffcff303ae7539ae9a62d34b19bc7e
 
 cm run script \
 	--tags=app,mlperf,inference,generic,_nvidia,_sdxl,_tensorrt,_cuda,_valid,_r4.1-dev_default,_offline \
@@ -71,7 +71,7 @@ cm run script \
 	--env.CM_DOCKER_REUSE_EXISTING_CONTAINER=yes \
 	--env.CM_DOCKER_DETACHED_MODE=yes \
 	--env.CM_MLPERF_INFERENCE_RESULTS_DIR_=/home/arjun/gh_action_results/valid_results \
-	--env.CM_DOCKER_CONTAINER_ID=86b28fb508d5 \
+	--env.CM_DOCKER_CONTAINER_ID=ec262bdd2e32 \
 	--env.CM_MLPERF_LOADGEN_COMPLIANCE_TEST=TEST04 \
 	--add_deps_recursive.compiler.tags=gcc \
 	--add_deps_recursive.coco2014-original.tags=_full \
@@ -129,4 +129,4 @@ Model Precision: int8
 ### Accuracy Results 
 
 ### Performance Results 
-`Samples per second`: `0.698813`
+`Samples per second`: `0.696804`
diff --git a/...nal-gpu-tensorrt-vdefault-default_config/stable-diffusion-xl/offline/accuracy_console.out b/...nal-gpu-tensorrt-vdefault-default_config/stable-diffusion-xl/offline/accuracy_console.out
@@ -1,30 +1,30 @@
-[2024-12-23 07:17:26,991 main.py:229 INFO] Detected system ID: KnownSystem.RTX4090x1
+[2024-12-25 07:19:30,599 main.py:229 INFO] Detected system ID: KnownSystem.RTX4090x1
 /home/cmuser/.local/lib/python3.8/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
   warnings.warn(_BETA_TRANSFORMS_WARNING)
 /home/cmuser/.local/lib/python3.8/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
   warnings.warn(_BETA_TRANSFORMS_WARNING)
-[2024-12-23 07:17:28,002 generate_conf_files.py:107 INFO] Generated measurements/ entries for RTX4090x1_TRT/stable-diffusion-xl/Offline
-[2024-12-23 07:17:28,002 __init__.py:46 INFO] Running command: python3 -m code.stable-diffusion-xl.tensorrt.harness --logfile_outdir="/cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/stable-diffusion-xl/offline/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=5000 --test_mode="AccuracyOnly" --gpu_batch_size=2 --mlperf_conf_path="/home/cmuser/CM/repos/local/cache/c1d8c371d52d46a3/inference/mlperf.conf" --tensor_path="build/preprocessed_data/coco2014-tokenized-sdxl/5k_dataset_final/" --use_graphs=true --user_conf_path="/home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/c2f53de4b9fb46138e1e0dad22533e07.conf" --gpu_inference_streams=1 --gpu_copy_streams=1 --gpu_engines="./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b2-int8.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b2-fp32.custom_k_99_MaxP.plan" --scenario Offline --model stable-diffusion-xl
-[2024-12-23 07:17:28,002 __init__.py:53 INFO] Overriding Environment
+[2024-12-25 07:19:31,615 generate_conf_files.py:107 INFO] Generated measurements/ entries for RTX4090x1_TRT/stable-diffusion-xl/Offline
+[2024-12-25 07:19:31,615 __init__.py:46 INFO] Running command: python3 -m code.stable-diffusion-xl.tensorrt.harness --logfile_outdir="/cm-mount/home/arjun/gh_action_results/valid_results/RTX4090x1-nvidia_original-gpu-tensorrt-vdefault-default_config/stable-diffusion-xl/offline/accuracy" --logfile_prefix="mlperf_log_" --performance_sample_count=5000 --test_mode="AccuracyOnly" --gpu_batch_size=2 --mlperf_conf_path="/home/cmuser/CM/repos/local/cache/c1d8c371d52d46a3/inference/mlperf.conf" --tensor_path="build/preprocessed_data/coco2014-tokenized-sdxl/5k_dataset_final/" --use_graphs=true --user_conf_path="/home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/98be8802ab644942b73ab8c1d44af551.conf" --gpu_inference_streams=1 --gpu_copy_streams=1 --gpu_engines="./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b2-int8.custom_k_99_MaxP.plan,./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b2-fp32.custom_k_99_MaxP.plan" --scenario Offline --model stable-diffusion-xl
+[2024-12-25 07:19:31,615 __init__.py:53 INFO] Overriding Environment
 /home/cmuser/.local/lib/python3.8/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
   warnings.warn(_BETA_TRANSFORMS_WARNING)
 /home/cmuser/.local/lib/python3.8/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
   warnings.warn(_BETA_TRANSFORMS_WARNING)
-[2024-12-23 07:17:29,411 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan.
-[2024-12-23 07:17:29,513 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan.
-[2024-12-23 07:17:30,016 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b2-int8.custom_k_99_MaxP.plan.
-[2024-12-23 07:17:31,058 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b2-fp32.custom_k_99_MaxP.plan.
-[2024-12-23 07:17:32,022 backend.py:96 INFO] Enabling cuda graphs for unet
-[2024-12-23 07:17:32,235 backend.py:154 INFO] captured graph for BS=1
-[2024-12-23 07:17:32,488 backend.py:154 INFO] captured graph for BS=2
-[2024-12-23 07:17:32,488 harness.py:207 INFO] Start Warm Up!
-[2024-12-23 07:17:38,305 harness.py:209 INFO] Warm Up Done!
-[2024-12-23 07:17:38,305 harness.py:211 INFO] Start Test!
-[2024-12-23 09:16:56,506 backend.py:801 INFO] [Server] Received 5000 total samples
-[2024-12-23 09:16:56,507 backend.py:809 INFO] [Device 0] Reported 5000 samples
-[2024-12-23 09:16:56,507 harness.py:214 INFO] Test Done!
-[2024-12-23 09:16:56,507 harness.py:216 INFO] Destroying SUT...
-[2024-12-23 09:16:56,507 harness.py:219 INFO] Destroying QSL...
+[2024-12-25 07:19:33,033 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIP-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan.
+[2024-12-25 07:19:33,133 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan.
+[2024-12-25 07:19:33,663 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b2-int8.custom_k_99_MaxP.plan.
+[2024-12-25 07:19:34,699 backend.py:71 INFO] Loading TensorRT engine: ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b2-fp32.custom_k_99_MaxP.plan.
+[2024-12-25 07:19:35,660 backend.py:96 INFO] Enabling cuda graphs for unet
+[2024-12-25 07:19:35,869 backend.py:154 INFO] captured graph for BS=1
+[2024-12-25 07:19:36,122 backend.py:154 INFO] captured graph for BS=2
+[2024-12-25 07:19:36,123 harness.py:207 INFO] Start Warm Up!
+[2024-12-25 07:19:41,960 harness.py:209 INFO] Warm Up Done!
+[2024-12-25 07:19:41,960 harness.py:211 INFO] Start Test!
+[2024-12-25 09:19:15,805 backend.py:801 INFO] [Server] Received 5000 total samples
+[2024-12-25 09:19:15,805 backend.py:809 INFO] [Device 0] Reported 5000 samples
+[2024-12-25 09:19:15,806 harness.py:214 INFO] Test Done!
+[2024-12-25 09:19:15,806 harness.py:216 INFO] Destroying SUT...
+[2024-12-25 09:19:15,806 harness.py:219 INFO] Destroying QSL...
 benchmark : Benchmark.SDXL
 buffer_manager_thread_count : 0
 data_dir : /home/cmuser/CM/repos/local/cache/5b2b0cc913a4453a/data
@@ -33,7 +33,7 @@ gpu_copy_streams : 1
 gpu_inference_streams : 1
 input_dtype : int32
 input_format : linear
-log_dir : /home/cmuser/CM/repos/local/cache/dfbf240f980947f5/repo/closed/NVIDIA/build/logs/2024.12.23-07.17.26
+log_dir : /home/cmuser/CM/repos/local/cache/dfbf240f980947f5/repo/closed/NVIDIA/build/logs/2024.12.25-07.19.29
 mlperf_conf_path : /home/cmuser/CM/repos/local/cache/c1d8c371d52d46a3/inference/mlperf.conf
 model_path : /home/cmuser/CM/repos/local/cache/5b2b0cc913a4453a/models/SDXL/
 offline_expected_qps : 0.0
@@ -44,7 +44,7 @@ system : SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name='13
 tensor_path : build/preprocessed_data/coco2014-tokenized-sdxl/5k_dataset_final/
 test_mode : AccuracyOnly
 use_graphs : True
-user_conf_path : /home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/c2f53de4b9fb46138e1e0dad22533e07.conf
+user_conf_path : /home/cmuser/CM/repos/mlcommons@mlperf-automations/script/generate-mlperf-inference-user-conf/tmp/98be8802ab644942b73ab8c1d44af551.conf
 system_id : RTX4090x1
 config_name : RTX4090x1_stable-diffusion-xl_Offline
 workload_setting : WorkloadSetting(HarnessType.Custom, AccuracyTarget.k_99, PowerSetting.MaxP)
@@ -60,7 +60,7 @@ cpu_freq : None
 [I] Loading bytes from ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-CLIPWithProj-Offline-gpu-b2-fp16.custom_k_99_MaxP.plan
 [I] Loading bytes from ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-UNetXL-Offline-gpu-b2-int8.custom_k_99_MaxP.plan
 [I] Loading bytes from ./build/engines/RTX4090x1/stable-diffusion-xl/Offline/stable-diffusion-xl-VAE-Offline-gpu-b2-fp32.custom_k_99_MaxP.plan
-[2024-12-23 09:16:56,803 run_harness.py:166 INFO] Result: Accuracy run detected.
+[2024-12-25 09:19:16,091 run_harness.py:166 INFO] Result: Accuracy run detected.
 
 ======================== Result summaries: ========================