Skip to content

Commit

Permalink
Patch CUDA training logic to enable wasmedge-llmc gpu backend
Browse files Browse the repository at this point in the history
Signed-off-by: Jun Zhang <[email protected]>
  • Loading branch information
junaire committed Sep 10, 2024
1 parent a6f9fe8 commit f66b549
Show file tree
Hide file tree
Showing 3 changed files with 495 additions and 55 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,28 @@ jobs:
cd build
cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=${{ matrix.target }} ..
cmake --build . -j$(nproc)
build-CUDA:
strategy:
matrix:
target:
- 'Debug'
- 'Release'
runs-on: ubuntu-20.04
container:
image: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: System Info
run: |
nvcc --version
g++ --version
- name: Build
run: |
apt-get update && apt-get install -y git
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=${{ matrix.target }} .. -DCUDALIB=1
cmake --build . -j$(nproc)
115 changes: 61 additions & 54 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
cmake_minimum_required(VERSION 3.15)
project(llm.c LANGUAGES C)
# project(llm.c LANGUAGES C CXX CUDA)

# Put binaries and libraries in the same location.
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
option(CUDALIB "Enable CUDA training" OFF)
option(PRECISION "Precision settings" BF16)


# Always export compile_commands.json for lsp like clangd.
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Expand All @@ -15,16 +12,24 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
message(FATAL_ERROR "This compiler is not supported")
endif()

if (CUDALIB)
project(llm.c LANGUAGES C CXX CUDA)
else()
project(llm.c LANGUAGES C)
endif()

# Put binaries and libraries in the same location.
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)

# Release by default if not specified.
if (NOT EXISTS ${CMAKE_BINARY_DIR}/CMakeCache.txt)
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
endif()
endif()

# option(PRECISION "Precision settings" BF16)
# option(USE_CUDNN "Use cudnn" ON)

add_library(train_gpt2_cpu train_gpt2.c)
target_include_directories(train_gpt2_cpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
target_link_libraries(train_gpt2_cpu PRIVATE m)
Expand All @@ -42,51 +47,53 @@ else()
endif()
target_compile_options(train_gpt2_cpu PRIVATE -Ofast -Wno-unused-result -Wno-ignored-pragmas -Wno-unknown-attributes -march=native)

# set_source_files_properties(llmc/cudnn_att.cpp PROPERTIES LANGUAGE CUDA)
# add_library(train_gpt2_cuda SHARED train_gpt2.cu llmc/cudnn_att.cpp)
# target_include_directories(train_gpt2_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
# target_compile_options(train_gpt2_cuda PRIVATE -O3 -t=0 --use_fast_math)
# set_target_properties(train_gpt2_cuda PROPERTIES CXX_STANDARD 17)
# if (PRECISION EQUAL "FP32")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP32)
# elseif(PRECISION EQUAL "FP16")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP16)
# else()
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_BF16)
# endif()
# Training GPT2 with CUDA.
if (CUDALIB)
set_source_files_properties(llmc/cudnn_att.cpp PROPERTIES LANGUAGE CUDA)
add_library(train_gpt2_cuda train_gpt2.cu llmc/cudnn_att.cpp)
target_include_directories(train_gpt2_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
target_compile_options(train_gpt2_cuda PRIVATE -O3 -t=0 --use_fast_math)
target_compile_definitions(train_gpt2_cuda PRIVATE -DLLMC_LIB=1)
set_target_properties(train_gpt2_cuda PROPERTIES CXX_STANDARD 17)
set_target_properties(train_gpt2_cuda PROPERTIES CUDA_ARCHITECTURES "72;80")

if (PRECISION EQUAL "FP32")
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP32)
elseif(PRECISION EQUAL "FP16")
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP16)
else()
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_BF16)
endif()

set(CUDNN_FRONTEND_BUILD_SAMPLES OFF)
set(CUDNN_FRONTEND_BUILD_UNIT_TESTS OFF)
message(STATUS "Fetching cudnn-frontend")
include(FetchContent)
FetchContent_Declare(
cf
URL https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.6.1.tar.gz
URL_HASH MD5=c131914d8007318ec7b5b5f792458cb4
)
FetchContent_MakeAvailable(cf)
FetchContent_GetProperties(cf)
target_include_directories(train_gpt2_cuda PRIVATE ${cf_SOURCE_DIR}/include)
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_CUDNN)
target_link_libraries(train_gpt2_cuda PRIVATE cudnn)

find_package(CUDAToolkit REQUIRED)
target_link_libraries(train_gpt2_cuda PRIVATE CUDA::cublas CUDA::cublasLt)

# Disable cudnn for now, it has soem bugs in its cmake.
# if (USE_CUDNN)
# include(FetchContent)
# FetchContent_Declare(cudnn-frontend URL https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.5.2.tar.gz)
# FetchContent_MakeAvailable(cudnn-frontend)
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_CUDNN)
# target_link_libraries(train_gpt2_cuda PRIVATE cudnn)
# endif()
if (NO_USE_MPI)
message(STATUS "→ MPI is manually disabled")
else()
find_package(MPI)
if (MPI_FOUND)
message(STATUS "✓ MPI found")
target_compile_definitions(train_gpt2_cuda PRIVATE -DUSE_MPI)
target_link_libraries(train_gpt2_cuda PRIVATE MPI::MPI_C)
else()
message(STATUS "✗ MPI not found")
endif()
endif()
endif()

# if (NO_USE_MPI)
# message(STATUS "→ MPI is manually disabled")
# else()
# find_package(MPI)
# if (MPI_FOUND)
# message(STATUS "✓ MPI found")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DUSE_MPI)
# target_link_libraries(train_gpt2_cuda PRIVATE MPI::MPI_C)
# else()
# message(STATUS "✗ MPI not found")
# endif()
# endif()
#
# if (NO_MULTI_GPU)
# message(STATUS "→ Multi-GPU (NCCL) is manually disabled")
# else()
# find_package(NCCL)
# if (NCCL_FOUND)
# message(STATUS "✓ NCCL found, OK to train with multiple GPUs")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DMULTI_GPU)
# target_link_libraries(train_gpt2_cuda PRIVATE NCCL::NCCL_C)
# else()
# message(STATUS "✗ NCCL is not found, disabling multi-GPU support")
# endif()
# endif()
Loading

0 comments on commit f66b549

Please sign in to comment.