pytorch · mikekgfb · May 13, 2024 · May 12, 2024 · May 12, 2024
diff --git a/parking_lot/unsupported/README.md b/parking_lot/unsupported/README.md
@@ -0,0 +1,28 @@
+# Enabling Models from Server to Mobile
+
+THIS DIRECTORY AND ITS SUBDIRECTORIES CONTAIN AN UNSUPPORTED EXAMPLE.
+
+This directory is a minimal example for integrating PyTorch models
+exported with either AOT Inductor as a shared library, also known as
+dynamic shared object (DSO), and as ExecuTorch-exported PTE model file
+in a C/C++ app.
+
+The example is derived from Andrej Karpathy's llama2.c executor, as
+modified by Bert Maher for llama2.so, and distributed under Andrej's
+original license.
+
+Please refer to the documentation at
+https://github.com/karpathy/llama2.c (and Bert Maher's
+https://github.com/bertmaher/llama2.so for modifications to serve as
+execution environment for PyTorch models) for a discussion of
+downloading and and preparing tokenizer models and invoking the model.
+
+This runner is limited to llama2-style models using the SentencePiece
+tokenizer to highlight the minimum example of how to enable an
+arbitrary application to call a PyTorch model in either a DSO or PTE
+format.  In additioon to header files, these changes include
+maintaining a pointer to the AOT Inductor or ExecTorch runtime
+executor, and the `forward()` function in runner/run.cpp as well as
+CMake files in runner-aoti and runner-et to build the runner with
+Executorch and AOT Inductor runtimes, specifically.
+
diff --git a/parking_lot/unsupported/runner-aoti/CMakeLists.txt b/parking_lot/unsupported/runner-aoti/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
+project(llama2so LANGUAGES CXX)
+
+find_package(CUDA)
+
+find_package(Torch REQUIRED)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g ${TORCH_CXX_FLAGS} -fpermissive")
+
+add_executable(run run.cpp)
+target_link_libraries(run "${TORCH_LIBRARIES}" m)
+set_property(TARGET run PROPERTY CXX_STANDARD 17)
diff --git a/parking_lot/unsupported/runner-aoti/LICENSE b/parking_lot/unsupported/runner-aoti/LICENSE
@@ -0,0 +1,22 @@
+MIT License
+
+Copyright (c) 2023 Andrej Karpathy
+Copyright (c) 2024 Meta Platforms
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/parking_lot/unsupported/runner-aoti/run.cpp b/parking_lot/unsupported/runner-aoti/run.cpp
@@ -0,0 +1,6 @@
+/* Inference for Llama-2 Transformer model in pure C */
+/* this uses the same logic regardless of AOTI OR ET */
+/* but requires different data types - ATen vs ETen  */
+
+#define __AOTI__MODEL
+#include "../runner/run.cpp"
diff --git a/parking_lot/unsupported/runner-et/CMakeLists.txt b/parking_lot/unsupported/runner-et/CMakeLists.txt
@@ -0,0 +1,32 @@
+cmake_minimum_required(VERSION 3.24)
+set(CMAKE_CXX_STANDARD 17)
+
+project(llama-fast)
+
+include(CMakePrintHelpers)
+set(LLAMA_FAST_ROOT $ENV{LLAMA_FAST_ROOT})
+cmake_print_variables(LLAMA_FAST_ROOT)
+
+find_package(executorch CONFIG REQUIRED PATHS ${LLAMA_FAST_ROOT}/build/install/lib/cmake/ExecuTorch)
+set(_common_include_directories ${LLAMA_FAST_ROOT}/build/src)
+cmake_print_variables(_common_include_directories)
+
+target_include_directories(executorch INTERFACE ${_common_include_directories}) # Ideally ET installation process would do this
+add_executable(runner_et run.cpp)
+
+# Link ET runtime + extensions
+target_link_libraries(
+    runner_et PRIVATE
+        executorch
+        extension_module
+        ${LLAMA_FAST_ROOT}/build/src/executorch/cmake-out/extension/data_loader/libextension_data_loader.a # This one does not get installed by ET
+        optimized_kernels
+        portable_kernels
+        cpublas
+        eigen_blas
+)
+target_link_libraries(runner_et PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,optimized_native_cpu_ops_lib">)
+target_link_libraries(runner_et PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,xnnpack_backend">)
+target_link_libraries(runner_et PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,XNNPACK">)
+target_link_libraries(runner_et PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,pthreadpool">)
+target_link_libraries(runner_et PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,cpuinfo">)
diff --git a/parking_lot/unsupported/runner-et/LICENSE b/parking_lot/unsupported/runner-et/LICENSE
@@ -0,0 +1,22 @@
+MIT License
+
+Copyright (c) 2023 Andrej Karpathy
+Copyright (c) 2024 Meta Platforms
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/parking_lot/unsupported/runner-et/run.cpp b/parking_lot/unsupported/runner-et/run.cpp
@@ -0,0 +1,6 @@
+/* Inference for Llama-2 Transformer model in pure C */
+/* this uses the same logic regardless of AOTI OR ET */
+/* but requires different data types - ATen vs ETen  */
+
+#define __ET__MODEL
+#include "../runner/run.cpp"