Skip to content

Commit

Permalink
Merge branch 'hdfs-fetch-and-build-just-in-time' into 'main'
Browse files Browse the repository at this point in the history
Remove generic dependency on Hadoop. Just download and compile it as needed.

See merge request dl/hugectr/hugectr!1545
  • Loading branch information
EmmaQiaoCh committed Jul 23, 2024
2 parents 676c5a8 + 8564756 commit 130d936
Show file tree
Hide file tree
Showing 11 changed files with 62 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ build_train_single_node_with_hdfs_minimal:
variables:
FROM_IMAGE: ${IMAGE_ALL}
DST_IMAGE: $TRAIN_IMAGE_VERSIONED_WITH_HDFS_MINI
CMAKE_OPTION: "-DCMAKE_BUILD_TYPE=Release -DKEY_HIT_RATIO=ON -DSM=\"60;61;70;75;80;90\" -DCLANGFORMAT=OFF -DENABLE_HDFS=MINIMAL"
CMAKE_OPTION: "-DCMAKE_BUILD_TYPE=Release -DKEY_HIT_RATIO=ON -DSM=\"60;61;70;75;80;90\" -DCLANGFORMAT=OFF -DENABLE_HDFS=ON"
BUILD_HUGECTR: 1
BUILD_HUGECTR2ONNX: 1

Expand Down
6 changes: 0 additions & 6 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@
[submodule "third_party/librdkafka"]
path = third_party/librdkafka
url = https://github.com/edenhill/librdkafka.git
[submodule "third_party/protobuf"]
path = third_party/protobuf
url = https://github.com/protocolbuffers/protobuf.git
[submodule "third_party/hadoop"]
path = third_party/hadoop
url = https://github.com/apache/hadoop.git
[submodule "third_party/HierarchicalKV"]
path = third_party/HierarchicalKV
url = https://github.com/NVIDIA-Merlin/HierarchicalKV.git
57 changes: 49 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#

cmake_minimum_required(VERSION 3.17)

project(HugeCTR LANGUAGES CXX CUDA)

list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
Expand Down Expand Up @@ -351,17 +352,57 @@ add_subdirectory(gpu_cache/src)

option(ENABLE_HDFS "Enable HDFS" OFF)
if(ENABLE_HDFS)
if(ENABLE_HDFS STREQUAL "MINIMAL")
message("HDFS build mode: Client only")
else()
message("HDFS build mode: Full")
endif()
message(STATUS "HDFS build mode: Client only")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DENABLE_HDFS")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_HDFS")

set(FETCHCONTENT_QUIET OFF)

# Java.
if (NOT EXISTS /usr/bin/mvn)
execute_process(WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
COMMAND /bin/bash ${PROJECT_SOURCE_DIR}/sbin/install-jdk-and-maven.sh
COMMAND_ERROR_IS_FATAL ANY
)
endif()

# Build and Install Hadoop
include(SetupHadoop)
hadoop_setup(${ENABLE_HDFS})
# Hadoop.
# sudo apt install libboost-date-time-dev
# sudo apt install libboost-program-options-dev
# sudo apt install libprotobuf-dev
# sudo apt install libfuse-dev
# sudo apt install libprotoc-dev
FetchContent_Declare(hadoop
DOWNLOAD_COMMAND git clone
--branch rel/release-3.4.0
--depth 1
--progress https://github.com/apache/hadoop.git
"${CMAKE_BINARY_DIR}/_deps/hadoop-src"
)
FetchContent_Populate(hadoop)
set(hadoop_SOURCE_DIR "${hadoop_SOURCE_DIR}/hadoop-hdfs-project/hadoop-hdfs-native-client")
set(hadoop_BINARY_DIR "${hadoop_SOURCE_DIR}/target/hadoop-hdfs-native-client-3.4.0")
if(EXISTS ${hadoop_BINARY_DIR}/include/hdfs.h AND EXISTS ${hadoop_BINARY_DIR}/lib/native/libhdfs.a)
message(STATUS "Found hdfs library in ${hadoop_BINARY_DIR}")
else()
execute_process(WORKING_DIRECTORY "${hadoop_SOURCE_DIR}"
COMMAND mvn clean package
-Pdist,native
-DskipTests
-Dtar
-Dmaven.javadoc.skip=true
-Drequire.snappy
-Drequire.zstd
-Drequire.openssl
-Drequire.pmdk
COMMAND_ERROR_IS_FATAL ANY
)
endif()
set(FETCHCONTENT_QUIET ON)

include_directories("${hadoop_BINARY_DIR}/include")
link_directories("${hadoop_BINARY_DIR}/lib/native")

set(ENABLE_HDFS ON)
endif()

Expand Down
2 changes: 1 addition & 1 deletion HugeCTR/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ target_link_libraries(huge_ctr_shared PRIVATE nlohmann_json::nlohmann_json)
target_link_libraries(huge_ctr_shared PUBLIC gpu_cache)

if(ENABLE_HDFS)
target_link_libraries(huge_ctr_shared PUBLIC ${DB_LIB_PATHS}/libhdfs.so)
target_link_libraries(huge_ctr_shared PUBLIC hdfs)
endif()

if(ENABLE_S3)
Expand Down
6 changes: 1 addition & 5 deletions HugeCTR/src/hps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,7 @@ add_compile_definitions(LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
add_library(huge_ctr_hps SHARED ${huge_ctr_hps_src})

if(ENABLE_HDFS)
target_link_libraries(
huge_ctr_hps
PUBLIC
${DB_LIB_PATHS}/libhdfs.so # from Hugectr
)
target_link_libraries(huge_ctr_hps PUBLIC hdfs)
endif()

if(ENABLE_S3)
Expand Down
6 changes: 1 addition & 5 deletions HugeCTR/src/inference_benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,7 @@ file(GLOB hps_benchmark_src
)

if(ENABLE_HDFS)
target_link_libraries(
huge_ctr_inference
PUBLIC
${DB_LIB_PATHS}/libhdfs.so # from Hugectr
)
target_link_libraries(huge_ctr_inference PUBLIC hdfs)
endif()

if(ENABLE_S3)
Expand Down
6 changes: 3 additions & 3 deletions docs/source/hugectr_contributor_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ To build HugeCTR Training Container from source, do the following:
- **ENABLE_INFERENCE**: You can use this option to build HugeCTR in inference mode, which was designed for the inference framework. In this mode, an inference shared library
will be built for the HugeCTR Backend. Only interfaces that support the HugeCTR Backend can be used. Therefore, you can’t train models in this mode. This option is set to
OFF by default. For building inference container, please refer to [Build HugeCTR Inference Container from Source](#build-hugectr-inference-container-from-source)
- **ENABLE_HDFS**: You can use this option to build HugeCTR together with HDFS to enable HDFS related functions. Permissible values are `ON`, `MINIMAL` and `OFF` *(default)*. Setting this option to `ON` leads to building all necessary Hadoop modules that are required for building AND running both HugeCTR and HDFS. In contrast, `MINIMAL` restricts building only the minimum necessary set of components for building HugeCTR.
- **ENABLE_HDFS**: You can use this option to build HugeCTR together with HDFS to enable HDFS related functions. Permissible values are `ON` and `OFF` *(default)*. Setting this option to `ON` leads to building all necessary Hadoop modules that are required for building so that it can connect to HDFS deployments.
- **ENABLE_S3**: You can use this option to build HugeCTR together with Amazon AWS S3 SDK to enable S3 related functions. Permissible values are `ON` and `OFF` *(default)*. Setting this option to `ON` leads to building all necessary AWS SKKs and dependencies that are required for building AND running both HugeCTR and S3.

**Please note that setting DENABLE_HDFS=ON/MINIMAL or DENABLE_S3=ON requires root permission. So before using these two options to do the customized building, make sure you use `-u root` when you run the docker container.**
**Please note that setting DENABLE_HDFS=ON or DENABLE_S3=ON requires root permission. So before using these two options to do the customized building, make sure you use `-u root` when you run the docker container.**

Here are some examples of how you can build HugeCTR using these build options:
```shell
Expand All @@ -124,7 +124,7 @@ To build HugeCTR Training Container from source, do the following:

```shell
$ mkdir -p build && cd build
$ cmake -DCMAKE_BUILD_TYPE=Release -DSM="70;80" -DENABLE_HDFS=MINIMAL .. # Target is NVIDIA V100 / A100 with only minimum HDFS components mode on.
$ cmake -DCMAKE_BUILD_TYPE=Release -DSM="70;80" -DENABLE_HDFS=ON .. # Target is NVIDIA V100 / A100 with HDFS components mode on.
$ make -j && make install
```

Expand Down
2 changes: 1 addition & 1 deletion sbin/install-hadoop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ if [[ ! -f "${HADOOP_HOME}/include/hdfs.h" ]]; then
cp hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/include/hdfs/hdfs.h ${HADOOP_HOME}/include
fi

# Cleanup reundant files.
# Cleanup redundant files.
for f in $(find ${HADOOP_HOME} -name *.cmd); do
rm -rf $f
done
Expand Down
1 change: 0 additions & 1 deletion third_party/hadoop
Submodule hadoop deleted from a585a7
1 change: 0 additions & 1 deletion third_party/protobuf
Submodule protobuf deleted from 22d0e2
5 changes: 5 additions & 0 deletions tools/dockerfiles/Dockerfile.optimized
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ ARG RELEASE=true

RUN apt-get update -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
libboost-date-time-dev \
libboost-program-options-dev \
libprotobuf-dev \
libprotoc-dev \
libfuse-dev \
clang-format \
libtbb-dev \
libaio-dev && \
Expand Down

0 comments on commit 130d936

Please sign in to comment.