diff --git a/examples/common/half.cpp b/examples/common/half.cpp new file mode 100644 index 0000000..63b25ab --- /dev/null +++ b/examples/common/half.cpp @@ -0,0 +1,70 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2016, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +// Naive implemantation of float <-> half conversion. +// inf, nan and denorms are not supported + +#include "half.h" + +uint16_t f2h(float f) +{ + if (f == 0.0f) + return 0; + + uint32_t t = reinterpret_cast(f); + t = ((t>>16) & 0x8000) | ((t>>13) & 0x03ff) | ((((t&0x7f800000)-0x38000000)>>13) & 0x7c00); + + return t & 0xffff; +} + +float h2f(uint16_t h) +{ + if (!(h & 0x7fff)) + return 0.0f; + float f; + uint32_t sign = h & 0x8000; + uint32_t t = h; + t = ((t & 0x7c00) + 0x1c000) | (t & 0x3ff); + reinterpret_cast(f) = sign << 16 | t << 13; + return f; +} + diff --git a/examples/common/half.h b/examples/common/half.h new file mode 100644 index 0000000..119c4db --- /dev/null +++ b/examples/common/half.h @@ -0,0 +1,51 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2016, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HALF_H__ +#define HALF_H__ + +#include "stdint.h" + +uint16_t f2h(float f); +float h2f(uint16_t h); + +#endif // HALF_H__ diff --git a/examples/gfx8/CMakeLists.txt b/examples/gfx8/CMakeLists.txt index ece0224..fcca6f5 100644 --- a/examples/gfx8/CMakeLists.txt +++ b/examples/gfx8/CMakeLists.txt @@ -43,4 +43,6 @@ asm_dispatch_example(ds_bpermute) asm_dispatch_example(dpp_reduce) asm_dispatch_example(s_memrealtime) +asm_dispatch_example(fp16_storage) +asm_dispatch_example(fp16_native) inline_asm_dispatch_example(s_memrealtime_inline) diff --git a/examples/gfx8/fp16_native.cpp b/examples/gfx8/fp16_native.cpp new file mode 100644 index 0000000..1ddfeec --- /dev/null +++ b/examples/gfx8/fp16_native.cpp @@ -0,0 +1,102 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2016, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "dispatch.hpp" +#include "half.h" + +using namespace amd::dispatch; + +class HalfVectorAdd : public Dispatch { +private: + Buffer* in1; + Buffer* in2; + Buffer* out; + unsigned length; + +public: + HalfVectorAdd(int argc, const char **argv) + : Dispatch(argc, argv), length(64) { } + + bool SetupCodeObject() override { + return LoadCodeObjectFromFile("fp16_native.co"); + } + + bool Setup() override { + if (!AllocateKernarg(3 * sizeof(Buffer*))) { return false; } + in1 = AllocateBuffer(length * sizeof(float) / 2); + in2 = AllocateBuffer(length * sizeof(float) / 2); + for (unsigned i = 0; i < length; ++i) { + in1->Data(i) = f2h(i); + in2->Data(i) = f2h(i * 1.25f); + } + if (!CopyTo(in1)) { output << "Error: failed to copy to local" << std::endl; return false; } + if (!CopyTo(in2)) { output << "Error: failed to copy to local" << std::endl; return false; } + out = AllocateBuffer(length * sizeof(float)/2); + Kernarg(in1); + Kernarg(in2); + Kernarg(out); + SetGridSize(64); + SetWorkgroupSize(64); + return true; + } + + bool Verify() override { + if (!CopyFrom(out)) { output << "Error: failed to copy from local" << std::endl; return false; } + bool ok = true; + for (unsigned i = 0; i < length; ++i) { + float f1 = h2f(in1->Data(i)); + float f2 = h2f(in2->Data(i)); + float res = h2f(out->Data(i)); + float expected = h2f(f2h(f1 + f2)); + if (expected != res){ + output << "Error: validation failed at " << i << ": got " << res << " expected " << expected << std::endl; + ok = false; + } + } + return ok; + } +}; + +int main(int argc, const char** argv) +{ + return HalfVectorAdd(argc, argv).RunMain(); +} diff --git a/examples/gfx8/fp16_native.s b/examples/gfx8/fp16_native.s new file mode 100644 index 0000000..9b9db04 --- /dev/null +++ b/examples/gfx8/fp16_native.s @@ -0,0 +1,100 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2016, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +// +// Vector add example using fp16 storage data type and fp16 add instruction +// + +.hsa_code_object_version 2,0 +.hsa_code_object_isa 8, 0, 3, "AMD", "AMDGPU" + +.text +.p2align 8 +.amdgpu_hsa_kernel hello_world + +hello_world: + + .amd_kernel_code_t + enable_sgpr_kernarg_segment_ptr = 1 + is_ptr64 = 1 + compute_pgm_rsrc1_vgprs = 0 + compute_pgm_rsrc1_sgprs = 0 + compute_pgm_rsrc2_user_sgpr = 2 + kernarg_segment_byte_size = 24 + wavefront_sgpr_count = 8 + workitem_vgpr_count = 4 + .end_amd_kernel_code_t + + // read kernel arguments: + // s[0:1] = half *in1 + // s[2:3] = half *in2 + // s[4:5] = half *out + s_load_dwordx2 s[4:5], s[0:1], 0x10 + s_load_dwordx4 s[0:3], s[0:1], 0x00 + + v_lshlrev_b32 v0, 1, v0 + s_waitcnt 0 + + // v[1:2] = &in1[i] + v_add_u32 v1, vcc, s0, v0 + v_mov_b32 v2, s1 + v_addc_u32 v2, vcc, v2, 0, vcc + flat_load_ushort v3, v[1:2] // v3 = in1[i] + + // v[1:2] = &in2[i] + v_add_u32 v1, vcc, s2, v0 + v_mov_b32 v2, s3 + v_addc_u32 v2, vcc, v2, 0, vcc + flat_load_ushort v2, v[1:2] // v2 = in2[i] + + // v[0:1] = &out[i] + v_add_u32 v0, vcc, s4, v0 + v_mov_b32 v1, s5 + v_addc_u32 v1, vcc, v1, 0, vcc + + // wait for memory operations to complete + s_waitcnt 0 + + v_add_f16 v3, v3, v2 // v3 = in1[i] + in2[i] + + flat_store_short v[0:1], v3 // out[i] = v3 + s_endpgm diff --git a/examples/gfx8/fp16_storage.cpp b/examples/gfx8/fp16_storage.cpp new file mode 100644 index 0000000..92af6d7 --- /dev/null +++ b/examples/gfx8/fp16_storage.cpp @@ -0,0 +1,102 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2016, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "dispatch.hpp" +#include "half.h" + +using namespace amd::dispatch; + +class HalfVectorAdd : public Dispatch { +private: + Buffer* in1; + Buffer* in2; + Buffer* out; + unsigned length; + +public: + HalfVectorAdd(int argc, const char **argv) + : Dispatch(argc, argv), length(64) { } + + bool SetupCodeObject() override { + return LoadCodeObjectFromFile("fp16_storage.co"); + } + + bool Setup() override { + if (!AllocateKernarg(3 * sizeof(Buffer*))) { return false; } + in1 = AllocateBuffer(length * sizeof(float) / 2); + in2 = AllocateBuffer(length * sizeof(float) / 2); + for (unsigned i = 0; i < length; ++i) { + in1->Data(i) = f2h(i); + in2->Data(i) = f2h(i * 1.25f); + } + if (!CopyTo(in1)) { output << "Error: failed to copy to local" << std::endl; return false; } + if (!CopyTo(in2)) { output << "Error: failed to copy to local" << std::endl; return false; } + out = AllocateBuffer(length * sizeof(float)/2); + Kernarg(in1); + Kernarg(in2); + Kernarg(out); + SetGridSize(64); + SetWorkgroupSize(64); + return true; + } + + bool Verify() override { + if (!CopyFrom(out)) { output << "Error: failed to copy from local" << std::endl; return false; } + bool ok = true; + for (unsigned i = 0; i < length; ++i) { + float f1 = h2f(in1->Data(i)); + float f2 = h2f(in2->Data(i)); + float res = h2f(out->Data(i)); + float expected = h2f(f2h(f1 + f2)); + if (expected != res){ + output << "Error: validation failed at " << i << ": got " << res << " expected " << expected << std::endl; + ok = false; + } + } + return ok; + } +}; + +int main(int argc, const char** argv) +{ + return HalfVectorAdd(argc, argv).RunMain(); +} diff --git a/examples/gfx8/fp16_storage.s b/examples/gfx8/fp16_storage.s new file mode 100644 index 0000000..83363aa --- /dev/null +++ b/examples/gfx8/fp16_storage.s @@ -0,0 +1,107 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2016, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +// +// Vector add example using fp16 storage data type and fp32 add instruction +// + +.hsa_code_object_version 2,0 +.hsa_code_object_isa 8, 0, 3, "AMD", "AMDGPU" + +.text +.p2align 8 +.amdgpu_hsa_kernel hello_world + +hello_world: + + .amd_kernel_code_t + enable_sgpr_kernarg_segment_ptr = 1 + is_ptr64 = 1 + compute_pgm_rsrc1_vgprs = 0 + compute_pgm_rsrc1_sgprs = 0 + compute_pgm_rsrc2_user_sgpr = 2 + kernarg_segment_byte_size = 24 + wavefront_sgpr_count = 8 + workitem_vgpr_count = 4 + .end_amd_kernel_code_t + + // read kernel arguments: + // s[0:1] = half *in1 + // s[2:3] = half *in2 + // s[4:5] = half *out + s_load_dwordx2 s[4:5], s[0:1], 0x10 + s_load_dwordx4 s[0:3], s[0:1], 0x00 + + v_lshlrev_b32 v0, 1, v0 + s_waitcnt 0 + + // v[1:2] = &in1[i] + v_add_u32 v1, vcc, s0, v0 + v_mov_b32 v2, s1 + v_addc_u32 v2, vcc, v2, 0, vcc + flat_load_ushort v3, v[1:2] // v3 = in1[i] + + // v[1:2] = &in2[i] + v_add_u32 v1, vcc, s2, v0 + v_mov_b32 v2, s3 + v_addc_u32 v2, vcc, v2, 0, vcc + flat_load_ushort v2, v[1:2] // v2 = in2[i] + + // v[0:1] = &out[i] + v_add_u32 v0, vcc, s4, v0 + v_mov_b32 v1, s5 + v_addc_u32 v1, vcc, v1, 0, vcc + + // wait for memory operations to complete + s_waitcnt 0 + + // convert input data to f32 + v_cvt_f32_f16 v3, v3 + v_cvt_f32_f16 v2, v2 + + v_add_f32 v3, v3, v2 // v3 = in1[i] + in2[i] + + //convert result back to f16 + v_cvt_f16_f32 v3, v3 + + flat_store_short v[0:1], v3 // out[i] = v3 + s_endpgm