diff --git a/examples/CMakeLists.txt.in b/examples/CMakeLists.txt.in index b455165c..57cb65dd 100644 --- a/examples/CMakeLists.txt.in +++ b/examples/CMakeLists.txt.in @@ -180,6 +180,7 @@ endif( ) find_package( Bolt REQUIRED ) add_subdirectory( DeviceVector ) +add_subdirectory( RadixSort ) add_subdirectory( Scan ) add_subdirectory( Sort ) add_subdirectory( StdDev ) diff --git a/examples/RadixSort/CMakeLists.txt b/examples/RadixSort/CMakeLists.txt new file mode 100644 index 00000000..9a6a2512 --- /dev/null +++ b/examples/RadixSort/CMakeLists.txt @@ -0,0 +1,47 @@ +############################################################################ +# Copyright 2012 - 2013 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################ + +# List the names of common files to compile across all platforms +set( clBolt.Example.RadixSort.Source RadixSort.cpp ) +set( clBolt.Example.RadixSort.Headers ${BOLT_INCLUDE_DIRS}/bolt/cl/count.h ${BOLT_INCLUDE_DIRS}/bolt/cl/detail/count.inl + ${BOLT_INCLUDE_DIRS}/bolt/cl/device_vector.h + ${BOLT_INCLUDE_DIRS}/bolt/cl/scan.h ${BOLT_INCLUDE_DIRS}/bolt/cl/detail/scan.inl + ${BOLT_INCLUDE_DIRS}/bolt/cl/scatter.h ${BOLT_INCLUDE_DIRS}/bolt/cl/detail/scatter.inl + ${BOLT_INCLUDE_DIRS}/bolt/cl/transform.h ${BOLT_INCLUDE_DIRS}/bolt/cl/detail/transform.inl + ${BOLT_INCLUDE_DIRS}/bolt/statisticalTimer.h ) + +set( clBolt.Example.RadixSort.Files ${clBolt.Example.RadixSort.Source} ${clBolt.Example.RadixSort.Headers} ) + +# Include headers files +include_directories( ${BOLT_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS} ) + +add_executable( clBolt.Example.RadixSort ${clBolt.Example.RadixSort.Files} ) +if(BUILD_TBB) + target_link_libraries( clBolt.Example.RadixSort ${BOLT_LIBRARIES} ${OPENCL_LIBRARIES} ${GTEST_LIBRARIES} ${Boost_LIBRARIES} ${TBB_LIBRARIES} ) +else (BUILD_TBB) + target_link_libraries( clBolt.Example.RadixSort ${BOLT_LIBRARIES} ${OPENCL_LIBRARIES} ${GTEST_LIBRARIES} ${Boost_LIBRARIES} ) +endif() + +set_target_properties( clBolt.Example.RadixSort PROPERTIES VERSION ${Examples_VERSION} ) +set_target_properties( clBolt.Example.RadixSort PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) + +# CPack configuration; include the executable into the package +install( TARGETS clBolt.Example.RadixSort + RUNTIME DESTINATION ${BIN_DIR} + LIBRARY DESTINATION ${LIB_DIR} + ARCHIVE DESTINATION ${LIB_DIR} + ) diff --git a/examples/RadixSort/RadixSort.cpp b/examples/RadixSort/RadixSort.cpp new file mode 100644 index 00000000..fbd2f144 --- /dev/null +++ b/examples/RadixSort/RadixSort.cpp @@ -0,0 +1,187 @@ +/*************************************************************************** +* Copyright 2012 - 2013 Advanced Micro Devices, Inc. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + +***************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define BITS_IN_UNSIGEND (8*sizeof(unsigned)) + +template +void CheckAscending(T &input, size_t length) +{ + size_t index; + for( index = 0; index < input.size( ) -1; ++index ) + { + if(input[index] <= input[index+1]) + continue; + else + break; + } + if(index == (length-1)) + { + std::cout << "PASSED....\n"; + } + else + { + std::cout << "FAILED....\n"; + } +} + +// Serial version of Radix Sort +void SerialRadixSort (const std::vector &input, size_t length, std::vector &answerSerial) +{ + std::vector vInput(input.begin(), input.end()); + std::vector vBuffer0(length); + std::vector vBuffer1(length); + + for (int iter = 0; iter < BITS_IN_UNSIGEND; iter++) + { + int iOffsetB0 = 0; + int iOffsetB1 = 0; + int i = 0; + + // Separate 0s and 1s to separate buffers + for (i = 0; i < length; i++) + { + if ((vInput[i] & (1<::iterator vInputNext = std::copy(vBuffer0.begin(), vBuffer0.begin() + iOffsetB0, vInput.begin()); + // Copy 1s to after 0s we just copied + std::copy(vBuffer1.begin(), vBuffer1.begin() + iOffsetB1, vInputNext); + } + + // Copy the answer + std::copy(vInput.begin(), vInput.end(), back_inserter(answerSerial)); +} + +// Functor for checking whether bit is 0 or 1 +BOLT_FUNCTOR(find_zeros, +struct find_zeros +{ + unsigned iMask; + + find_zeros (unsigned iter) { iMask = 1 << iter;}; + + bool operator()(const unsigned &x) const {return (x & iMask) == 0;} +}; +); + +BOLT_FUNCTOR(find_ones, +struct find_ones +{ + unsigned iMask; + + find_ones (unsigned iter) { iMask = 1 << iter;}; + + bool operator()(const unsigned &x) const {return (x & iMask) != 0;} +}; +); + +// Bolt version of Radix Sort +void BoltRadixSort (const std::vector &input, size_t length, std::vector &answerBolt) +{ + bolt::cl::device_vector dvInput( input.begin(), input.end()); + bolt::cl::device_vector dvBuffer(length); + bolt::cl::device_vector dvMaskedVals(length); + bolt::cl::device_vector dvOffsetVals(length); + + for (int iter = 0; iter < BITS_IN_UNSIGEND; iter += 2) + { + // iter + // Separate 0s first + bolt::cl::transform(dvInput.begin(), dvInput.end(), dvMaskedVals.begin(), find_zeros(iter)); // Find elements with 0's, in parallel + bolt::cl::exclusive_scan(dvMaskedVals.begin(), dvMaskedVals.end(), dvOffsetVals.begin()); // Figure out where in the buffer to copy to, in parallel + bolt::cl::scatter_if(dvInput.begin(), dvInput.end(), dvOffsetVals.begin(), dvMaskedVals.begin(), dvBuffer.begin()); // Copy all the values to the buffer in parallel + + // And then, separate 1s + int count = bolt::cl::count(dvMaskedVals.begin(), dvMaskedVals.end(), 1); // Count how many elements with 0's we already processed + bolt::cl::transform(dvInput.begin(), dvInput.end(), dvMaskedVals.begin(), find_ones(iter)); // Find elements with 1's, in parallel + bolt::cl::exclusive_scan(dvMaskedVals.begin(), dvMaskedVals.end(), dvOffsetVals.begin(), count); // Figure out where in the buffer to copy to, in parallel + bolt::cl::scatter_if(dvInput.begin(), dvInput.end(), dvOffsetVals.begin(), dvMaskedVals.begin(), dvBuffer.begin()); // Copy all the values to the buffer in parallel + + // iter + 1 + // In order to avoid unnecessary copy operation, perfrom (iter+1)'th iteration in the same loop + // Separate 0s first + bolt::cl::transform(dvBuffer.begin(), dvBuffer.end(), dvMaskedVals.begin(), find_zeros(iter+1)); // Find elements with 0's first, in parallel + bolt::cl::exclusive_scan(dvMaskedVals.begin(), dvMaskedVals.end(), dvOffsetVals.begin()); // Figure out where in the buffer to copy to, in parallel + bolt::cl::scatter_if(dvBuffer.begin(), dvBuffer.end(), dvOffsetVals.begin(), dvMaskedVals.begin(), dvInput.begin()); // Copy all the values to the buffer in parallel + + // And then, separate 1s + count = bolt::cl::count(dvMaskedVals.begin(), dvMaskedVals.end(), 1); // Count how many elements with 0's we already processed + bolt::cl::transform(dvBuffer.begin(), dvBuffer.end(), dvMaskedVals.begin(), find_ones(iter+1)); // Find elements with 1's, in parallel + bolt::cl::exclusive_scan(dvMaskedVals.begin(), dvMaskedVals.end(), dvOffsetVals.begin(), count); // Figure out where in the buffer to copy to, in parallel + bolt::cl::scatter_if(dvBuffer.begin(), dvBuffer.end(), dvOffsetVals.begin(), dvMaskedVals.begin(), dvInput.begin()); // Copy all the values to the buffer in parallel + } + + // Copy the answer + bolt::cl::device_vector::pointer pData = dvInput.data(); + std::copy(&pData[0], &pData[length], back_inserter(answerBolt)); +} + +int main() +{ + std::cout << "\nRadix Sort EXAMPLE \n"; + + srand (time(NULL)); + + // Prepare 2^10 elements of random unsigned numbers to be sorted + size_t length = 1024*1024; + std::vector input(length); + std::generate(input.begin(), input.end(), rand); + + std::vector answerBolt; + std::vector answerSerial; + + // Serial version of Radix Sort + std::cout << "\nSorting STL vector of " << length << " unsigned integer elements using Serial Radix sort.\n"; + SerialRadixSort (input, length, answerSerial); + CheckAscending (answerSerial, length); + + // Bolt version of Radix Sort + std::cout << "\nSorting STL vector of " << length << " unsigned integer elements using Bolt Radix sort.\n"; + BoltRadixSort (input, length, answerBolt); + CheckAscending (answerBolt, length); + + // Verify that answerSerial matches answerBolt + std::cout << "\nComparing output of Serial Radix sort and Bolt Radix sort.\n"; + size_t i=0; + for (i=0; i devices; - bolt::cl::V_OPENCL( platforms.front( ).getDevices( CL_DEVICE_TYPE_ALL, &devices ),"Platform::getDevices() failed"); + bolt::cl::V_OPENCL( (*i).getDevices( CL_DEVICE_TYPE_ALL, &devices ),"Platform::getDevices() failed"); cl::Context myContext( devices.at( userDevice ) ); cl::CommandQueue myQueue( myContext, devices.at( userDevice ) );