[refactor] Make llm.c modular

Signed-off-by: Jun Zhang <[email protected]>
WasmEdge · Aug 1, 2024 · 555e9c2 · 555e9c2
1 parent bdb0fb5
commit 555e9c2
Show file tree

Hide file tree

Showing 5 changed files with 1,329 additions and 1,022 deletions.
diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
@@ -0,0 +1,65 @@
+name: Cmake
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+jobs:
+  build-Clang-Linux:
+    strategy:
+      matrix:
+        target:
+        - 'Debug'
+        - 'Release'
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install clang
+      run: |
+        sudo apt update && sudo apt install build-essential software-properties-common clang
+    - name: ccache
+      uses: hendrikmuhs/[email protected]
+    - name: Build
+      run: |
+        echo "/usr/lib/ccache:/usr/local/opt/ccache/libexec" >> $GITHUB_PATH
+        mkdir build
+        cd build
+        cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=${{ matrix.target }} ..
+        cmake --build . -j$(nproc)
+        ctest . -j$(nproc)
+  build-GCC-Linux:
+    strategy:
+      matrix:
+        target:
+        - 'Debug'
+        - 'Release'
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install GCC
+      run: |
+        sudo apt update && sudo apt install build-essential software-properties-common
+    - name: ccache
+      uses: hendrikmuhs/[email protected]
+    - name: Build
+      run: |
+        echo "/usr/lib/ccache:/usr/local/opt/ccache/libexec" >> $GITHUB_PATH
+        mkdir build
+        cd build
+        cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=${{ matrix.target }} ..
+        cmake --build . -j$(nproc)
+  build-windows:
+    strategy:
+      matrix:
+        target:
+        - 'Debug'
+        - 'Release'
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Build
+      run: |
+        mkdir build
+        cd build
+        cmake -DCMAKE_BUILD_TYPE=${{ matrix.target }} ..
+        cmake --build . -j $Env:NUMBER_OF_PROCESSORS
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,92 @@
+cmake_minimum_required(VERSION 3.24)
+project(llm.c LANGUAGES C)
+# project(llm.c LANGUAGES C CXX CUDA)
+
+# Put binaries and libraries in the same location.
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
+
+# Always export compile_commands.json for lsp like clangd.
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+# Release by default if not specified.
+if (NOT EXISTS ${CMAKE_BINARY_DIR}/CMakeCache.txt)
+  if (NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
+  endif()
+endif()
+
+# option(PRECISION "Precision settings" BF16)
+# option(USE_CUDNN "Use cudnn" ON)
+
+add_library(train_gpt2_cpu SHARED train_gpt2.c)
+target_include_directories(train_gpt2_cpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
+target_link_libraries(train_gpt2_cpu PRIVATE m)
+target_compile_definitions(train_gpt2_cpu PRIVATE -DLLMC_LIB=1)
+if (NO_OMP)
+  message(STATUS "OpenMP is manually disabled")
+else()
+  find_package(OpenMP)
+  if (OpenMP_FOUND)
+    message(STATUS "✓ OpenMP found")
+    target_link_libraries(train_gpt2_cpu PRIVATE OpenMP::OpenMP_C)
+  else()
+    message(STATUS "✗ OpenMP not found")
+  endif()
+endif()
+if (MSVC)
+  target_include_directories(train_gpt2_cpu PUBLIC dev)
+  target_compile_options(train_gpt2_cpu PRIVATE /Zi /nologo /W4 /WX- /diagnostics:column /sdl /O2 /Oi /Ot /GL /D _DEBUG /D _CONSOLE /D _UNICODE /D UNICODE /Gm- /EHsc /MD /GS /Gy /fp:fast /Zc:wchar_t /Zc:forScope /Zc:inline /permissive- /external:W3 /Gd /TP /wd4996 /FC /openmp:llvm)
+else()
+    target_compile_options(train_gpt2_cpu PRIVATE -Ofast -Wno-unused-result -Wno-ignored-pragmas -Wno-unknown-attributes -march=native)
+endif()
+
+# set_source_files_properties(llmc/cudnn_att.cpp PROPERTIES LANGUAGE CUDA)
+# add_library(train_gpt2_cuda SHARED train_gpt2.cu llmc/cudnn_att.cpp)
+# target_include_directories(train_gpt2_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
+# target_compile_options(train_gpt2_cuda PRIVATE -O3 -t=0 --use_fast_math)
+# set_target_properties(train_gpt2_cuda PROPERTIES CXX_STANDARD 17)
+# if (PRECISION EQUAL "FP32")
+#   target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP32)
+# elseif(PRECISION EQUAL "FP16")
+#   target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP16)
+# else()
+#   target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_BF16)
+# endif()
+
+
+# Disable cudnn for now, it has soem bugs in its cmake.
+# if (USE_CUDNN)
+#   include(FetchContent)
+#   FetchContent_Declare(cudnn-frontend URL https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.5.2.tar.gz)
+#   FetchContent_MakeAvailable(cudnn-frontend)
+#   target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_CUDNN)
+#   target_link_libraries(train_gpt2_cuda PRIVATE cudnn)
+# endif()
+
+# if (NO_USE_MPI)
+#   message(STATUS "→ MPI is manually disabled")
+# else()
+#   find_package(MPI)
+#   if (MPI_FOUND)
+#     message(STATUS "✓ MPI found")
+#     target_compile_definitions(train_gpt2_cuda PRIVATE -DUSE_MPI)
+#     target_link_libraries(train_gpt2_cuda PRIVATE MPI::MPI_C)
+#   else()
+#       message(STATUS "✗ MPI not found")
+#   endif()
+# endif()
+#
+# if (NO_MULTI_GPU)
+#   message(STATUS "→ Multi-GPU (NCCL) is manually disabled")
+# else()
+#   find_package(NCCL)
+#   if (NCCL_FOUND)
+#     message(STATUS "✓ NCCL found, OK to train with multiple GPUs")
+#     target_compile_definitions(train_gpt2_cuda PRIVATE -DMULTI_GPU)
+#     target_link_libraries(train_gpt2_cuda PRIVATE NCCL::NCCL_C)
+#   else()
+#     message(STATUS "✗ NCCL is not found, disabling multi-GPU support")
+#   endif()
+# endif()
diff --git a/llmc/dataloader.h b/llmc/dataloader.h
@@ -200,6 +200,19 @@ void dataloader_init(DataLoader *loader,
     dataloader_reset(loader);
 }
 
+DataLoader* dataloader_create(
+                     const char* filename_pattern,
+                     size_t B,
+                     size_t T,
+                     int process_rank,
+                     int num_processes,
+                     int should_shuffle) {
+  DataLoader* loader = (DataLoader*)mallocCheck(sizeof(DataLoader));
+  dataloader_init(loader, filename_pattern, B, T, process_rank, num_processes, should_shuffle);
+  return loader;
+
+}
+
 void dataloader_load_batch(DataLoader* loader) {
     assert(!loader->should_shuffle || (loader->should_shuffle && loader->intra_shard_indices != NULL));
     assert(loader->current_sample_idx < loader->shard_num_samples);
@@ -248,6 +261,12 @@ void dataloader_free(DataLoader *loader) {
     globfree(&loader->glob_result);
 }
 
+void dataloader_destroy(DataLoader* loader) {
+  dataloader_free(loader);
+  free(loader);
+  loader = NULL;
+}
+
 // ----------------------------------------------------------------------------
 // Distributed Eval Loader
 // Many evals (like) HellaSwag and MMLU are multiple-choice
@@ -511,4 +530,4 @@ void evalloader_free(EvalLoader *loader) {
     fcloseCheck(loader->eval_file);
 }
 
-#endif // DATALOADER_H
+#endif // DATALOADER_H
diff --git a/llmc/tokenizer.h b/llmc/tokenizer.h
@@ -83,6 +83,12 @@ void tokenizer_init(Tokenizer *tokenizer, const char *filename) {
     tokenizer->init_ok = 1;
 }
 
+Tokenizer* tokenizer_create(const char *filename) {
+  Tokenizer* tokenizer = (Tokenizer*)mallocCheck(sizeof(Tokenizer));
+  tokenizer_init(tokenizer, filename);
+  return tokenizer;
+}
+
 const char *tokenizer_decode(Tokenizer *tokenizer, uint32_t token_id) {
     if (tokenizer->init_ok == 0) {
         return NULL;
@@ -103,3 +109,9 @@ void tokenizer_free(Tokenizer *tokenizer) {
         free(tokenizer->token_table);
     }
 }
+
+void tokenizer_destroy(Tokenizer* tokenizer) {
+  tokenizer_free(tokenizer);
+  free(tokenizer);
+  tokenizer = NULL;
+}