Merge pull request #1 from LeelaChessZero/master

synching
hans-ekbrand · Feb 23, 2021 · 1d0ac79 · 1d0ac79
2 parents bd81641 + 7fe47d3
commit 1d0ac79
Show file tree

Hide file tree

Showing 38 changed files with 2,035 additions and 603 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 *.swp
 .clang_complete
 .DS_Store
+.cache/
 .clangd/
 build/
 __pycache__/

diff --git a/README.md b/README.md
@@ -166,12 +166,11 @@ Or.
  * Install Xcode command-line tools: ``xcode-select --install``
  * Install "XCode Developer Tools" through the app store. (First one on the list of Apps if searched.)
  * Associate the SDK headers in XCode with a command: export CPATH=\`xcrun --show-sdk-path\`/usr/include
-
+ 
 Now download the lc0 source, if you haven't already done so, following the instructions earlier in the page.
 
 6. Go to the lc0 directory.
-7. Run `./build.sh` (needs step 5)
-8. The resulting binary will be in build/release
+7. Run `./build.sh -Dgtest=false` (needs step 5)
 
 ### Raspberry Pi
 

diff --git a/appveyor.yml b/appveyor.yml
@@ -61,7 +61,7 @@ install:
 - cmd: IF DEFINED CUDA_INSTALL cuda_11.1.0_win10_network.exe -s nvcc_11.1 cublas_dev_11.1 cublas_11.1 cudart_11.1 documentation_11.1
 - cmd: IF %CUDA%==true set PATH=%CUDA_PATH%\bin;%PATH%
 - cmd: set PATH=C:\Python36;C:\Python36\scripts;%PATH%
-- cmd: pip3 install --upgrade meson
+- cmd: pip3 install --upgrade meson==0.55.3
 - cmd: call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
 - cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 appveyor DownloadFile https://dl.google.com/android/repository/android-ndk-r19c-windows-x86_64.zip
 - cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 7z x android-ndk-r19c-windows-x86_64.zip -oC:\ndk
@@ -101,7 +101,7 @@ before_build:
 - cmd: IF %NAME%==cpu-dnnl SET EXTRA=-Db_vscrt=md
 - cmd: IF %ANDROID%==false meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Ddx=%DX% -Dcudnn=%CUDNN% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%CUDA_PATH%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%CUDA_PATH%\cuda\lib\x64" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.5.0_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static %EXTRA%
 - cmd: IF %ANDROID%==true meson arm64-v8a --buildtype release -Dgtest=false -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\android-aarch64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\android-aarch64\lib" -Dembed=%EMBED% -Ddefault_library=static --cross-file crossfile-aarch64
-- cmd: IF %ANDROID%==true meson armeabi-v7a --buildtype release -Dgtest=false -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\android-armv7a\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\android-armv7a\lib" -Dembed=%EMBED% -Ddefault_library=static --cross-file crossfile-armv7a -Dispc=false
+- cmd: IF %ANDROID%==true meson armeabi-v7a --buildtype release -Dgtest=false -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\android-armv7a\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\android-armv7a\lib" -Dembed=%EMBED% -Ddefault_library=static --cross-file crossfile-armv7a -Dispc=false -Dneon=false
 build_script:
 - cmd: IF %ANDROID%==false call scripts\appveyor_win_build.cmd
 - cmd: IF %ANDROID%==true call scripts\appveyor_android_build.cmd

diff --git a/build.cmd b/build.cmd
@@ -3,6 +3,7 @@ setlocal
 
 rem 1. Set the following for the options you want to build.
 set CUDNN=true
+set CUDA=true
 set DX12=false
 set OPENCL=false
 set MKL=false
@@ -13,7 +14,7 @@ set TEST=false
 
 rem 2. Edit the paths for the build dependencies.
 set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0
-set CUDNN_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0
+set CUDNN_PATH=%CUDA_PATH%
 set OPENBLAS_PATH=C:\OpenBLAS
 set MKL_PATH=C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl
 set DNNL_PATH=C:\dnnl_win_1.1.1_cpu_vcomp
@@ -24,6 +25,11 @@ rem 3. In most cases you won't need to change anything further down.
 echo Deleting build directory:
 rd /s build
 
+set CC=cl
+set CXX=cl
+set CC_LD=link
+set CXX_LD=link
+
 if exist "C:\Program Files (x86)\Microsoft Visual Studio\2019" (
   where /q cl
   if errorlevel 1 call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
@@ -34,6 +40,9 @@ if exist "C:\Program Files (x86)\Microsoft Visual Studio\2019" (
   set backend=vs2017
 )
 
+set BLAS=true
+if %MKL%==false if %DNNL%==false if %OPENBLAS%==false if %EIGEN%==false set BLAS=false
+
 if "%CUDA_PATH%"=="%CUDNN_PATH%" (
   set CUDNN_LIB_PATH=%CUDNN_PATH%\lib\x64
   set CUDNN_INCLUDE_PATH=%CUDNN_PATH%\include
@@ -44,8 +53,8 @@ if "%CUDA_PATH%"=="%CUDNN_PATH%" (
 
 if %CUDNN%==true set PATH=%CUDA_PATH%\bin;%PATH%
 
-meson build --backend %backend% --buildtype release -Ddx=%DX12% -Dcudnn=%CUDNN% -Dopencl=%OPENCL% ^
--Dblas=true -Dmkl=%MKL% -Dopenblas=%OPENBLAS% -Deigen=%EIGEN% -Ddnnl=%DNNL% -Dgtest=%TEST% ^
+meson build --backend %backend% --buildtype release -Ddx=%DX12% -Dcudnn=%CUDNN% -Dplain_cuda=%CUDA% ^
+-Dopencl=%OPENCL% -Dblas=%BLAS% -Dmkl=%MKL% -Dopenblas=%OPENBLAS% -Ddnnl=%DNNL% -Dgtest=%TEST% ^
 -Dcudnn_include="%CUDNN_INCLUDE_PATH%" -Dcudnn_libdirs="%CUDNN_LIB_PATH%" ^
 -Dmkl_include="%MKL_PATH%\include" -Dmkl_libdirs="%MKL_PATH%\lib\intel64" -Ddnnl_dir="%DNNL_PATH%" ^
 -Dopencl_libdirs="%OPENCL_LIB_PATH%" -Dopencl_include="%OPENCL_INCLUDE_PATH%" ^

diff --git a/changelog.txt b/changelog.txt
@@ -1,4 +1,58 @@
-v0.26.0-rc1 (2020-06-29)
+v0.27.0-rc0 (2021-02-06)
+~~~~~~~
+* Multigather search inspired by Ceres.
+* V6 training format with additional info for training experiments.
+* Updated default search parameters.
+* A better algorithm for the backendbench assistant.
+* Terminate search early if only 1 move isn't a proven loss.
+* Various build system changes.
+
+v0.26.3 (2020-10-10)
+~~~~~~~
+* Increased maximum value of TempDecayMoves.
+
+v0.26.3-rc2 (2020-10-03)
+~~~~~~~
+* Fix for uninitialized variable that led to crashes with the cudnn backend.
+* Correct windows support for systems with more than 64 threads.
+* A new package is built for the `cuda` backend with cuda 11.1. The old cuda
+  package is renamed to `cudnn`.
+
+v0.26.3-rc1 (2020-09-28)
+~~~~~~~
+* Residual block fusion optimization for cudnn backend, that depends on
+  `custom_winograd=true`. Enabled by default only for networks with up to 384
+  filters in fp16 mode and never in fp32 mode. Default can be overridden with
+  `--backend-opts=res_block_fusing=false` to disable (or `=true` to enable).
+* New experimental cuda backend without cudnn dependency (`cuda-auto`, `cuda`
+  and `cuda-fp16` are available).
+
+v0.26.2 (2020-08-31)
+~~~~~~~
+* No changes from rc1.
+
+v0.26.2-rc1 (2020-08-28)
+~~~~~~~~~~~
+* Repetitions in the search tree are marked as draws, to explore more promising
+  lines. Enabled by default (except in selfplay mode) use
+  `--two-fold-draws=false` to disable.
+* Syzygy tablebase files can now be used in selfplay. Still need to add
+  adjudication support before we can consider using this for training.
+* Default net updated to 703810.
+* Fix for book with CR/LF line endings.
+* Updated Eigen wrap to use new download link.
+
+v0.26.1 (2020-07-15)
+~~~~~~~
+* Fix a bug where invalid openings-pgn settings would result in the book
+  being ignored rather than used.
+* Add support for compressed book files.
+
+v0.26.0 (2020-07-03)
+~~~~~~~
+* No changes from rc1.
+
+v0.26.0-rc1 (2020-06-29)
 ~~~~~~~~~~~
 
 * Verbose move stats now includes a line for the root node itself.

diff --git a/meson.build b/meson.build
@@ -48,6 +48,12 @@ endif
 if host_machine.system() == 'windows'
   add_project_arguments('-DNOMINMAX', language : 'cpp')
 endif
+if host_machine.cpu_family() == 'arm'
+  if get_option('neon')
+    add_project_arguments(cc.get_supported_arguments(['-mfpu=neon']), language : 'cpp')
+    add_project_link_arguments(cc.get_supported_arguments(['-mfpu=neon']), language : 'cpp')
+  endif
+endif
 
 # Files to compile.
 deps = []
@@ -190,8 +196,8 @@ if get_option('build_backends')
   ## Tensorflow
   ## ~~~~~~~~~~
   tf_dl_lib = cc.find_library('dl', required: false)
-  tf_tensorflow_cc_lib = dependency('tensorflow_cc',
-                                    required: false, include_type:'system')
+  # We had `is_system: true` to reduce warnings, but meson > 0.56.0 breaks.
+  tf_tensorflow_cc_lib = dependency('tensorflow_cc', required: false)
   if get_option('tensorflow') and tf_dl_lib.found() and tf_tensorflow_cc_lib.found()
     deps += [tf_dl_lib, tf_tensorflow_cc_lib]
     files += 'src/neural/network_tf_cc.cc'
@@ -224,7 +230,10 @@ if get_option('build_backends')
   if get_option('blas')
     if get_option('mkl') and mkl_lib.found()
       add_project_arguments(['-DUSE_MKL', '-DUSE_BLAS'], language : 'cpp')
-      includes += include_directories(get_option('mkl_include'))
+      mkl_inc = get_option('mkl_include')
+      if run_command('scripts/checkdir.py', mkl_inc).returncode() == 0
+        includes += include_directories(mkl_inc)
+      endif
       deps += [ mkl_lib ]
 
     elif get_option('dnnl') and dnnl_lib.found()
@@ -278,7 +287,7 @@ if get_option('build_backends')
         ispc_extra_args += ['--pic']
         outputnames = [ '@[email protected]']
         if not ispc_native_only
-          outputnames += ['@BASENAME@_sse2.o', '@BASENAME@_sse4.o', 
+          outputnames += ['@BASENAME@_sse2.o', '@BASENAME@_sse4.o',
                           '@BASENAME@_avx.o', '@BASENAME@_avx2.o',
                           '@BASENAME@_avx512knl.o', '@BASENAME@_avx512skx.o' ]
         endif
@@ -441,6 +450,11 @@ if get_option('build_backends')
     if get_option('nvcc_ccbin') != ''
       cuda_arguments += ['-ccbin=' + get_option('nvcc_ccbin')]
     endif
+    cuda_cc = get_option('cc_cuda') # Unfortunately option cuda_cc is reserved.
+    nvcc_extra_args = []
+    if cuda_cc != ''
+      nvcc_extra_args = ['-arch=compute_' + cuda_cc, '-code=sm_' + cuda_cc]
+    endif
     foreach x : get_option('cudnn_include')
       cuda_arguments += ['-I', x]
     endforeach
@@ -454,27 +468,42 @@ if get_option('build_backends')
         arguments: cuda_arguments,
       )
 	 files += cuda_files
-    files += cuda_gen.process(cuda_files_nvcc_common)
-    nvcc_extra_args = ['-arch=compute_53']
-    nvcc_help = run_command(nvcc, '-h').stdout()
-    foreach x : ['sm_80', 'sm_75', 'sm_86', 'sm_70', 'sm_60' , 'sm_72', 'sm_62', 'sm_53']
-      if nvcc_help.contains(x)
-        nvcc_extra_args += '-code=' + x
+    files += cuda_gen.process(cuda_files_nvcc_common, extra_args: nvcc_extra_args)
+    nvcc_arch = '-arch=compute_70'
+    nvcc_sm_list = ['sm_80', 'sm_75', 'sm_86', 'sm_70']
+    if host_machine.system() != 'windows'
+      nvcc_arch = '-arch=compute_60'
+      nvcc_sm_list += ['sm_60']
+      if ['arm', 'aarch64'].contains(host_machine.cpu_family())
+        # Add Jetson versions to the list.
+        message('Jetson support enabled.')
+        nvcc_arch = '-arch=compute_53'
+        nvcc_sm_list += ['sm_72', 'sm_62', 'sm_53']
       endif
-    endforeach
+    endif
+    # Ignore the given CC for fp16 when it is not in the supported list.
+    if cuda_cc == '' or not nvcc_sm_list.contains('sm_' + cuda_cc)
+      nvcc_extra_args = [nvcc_arch]
+      nvcc_help = run_command(nvcc, '-h').stdout()
+      foreach x : nvcc_sm_list
+        if nvcc_help.contains(x)
+          nvcc_extra_args += '-code=' + x
+        endif
+      endforeach
+    endif
     files += cuda_gen.process(cuda_files_nvcc_fp16, extra_args: nvcc_extra_args)
     has_backends = true
   endif
-  
+
   ## ~~~~~~~~
   ## DirectX
   ## ~~~~~~~~
-  
-  # we should always be able to build DirectX12 backend on windows platform  
+
+  # we should always be able to build DirectX12 backend on windows platform
   if host_machine.system() == 'windows' and get_option('dx')
     dx_d3d12 = cc.find_library('d3d12')
     dx_dxgi = cc.find_library('dxgi')
-  
+
     dx_files = [
       'src/neural/dx/network_dx.cc',
       'src/neural/dx/shader_wrapper.cc',
@@ -487,8 +516,7 @@ if get_option('build_backends')
     subdir('src/neural/dx/shaders')
 
     has_backends = true
-  endif  
-
+  endif
 
 endif # if get_option('build_backends')
 
@@ -525,6 +553,11 @@ endif
       dirs: ['/usr/local/lib'], required: false)
   endif
 
+  deps += cc.find_library('libatomic', required: false)
+
+  if get_option('malloc') != ''
+    deps += cc.find_library(get_option('malloc'), required: true)
+  endif
 
 #############################################################################
 ## Main Executable
@@ -578,12 +611,11 @@ if get_option('gtest')
     include_directories: includes, link_with: lc0_lib, dependencies: gtest
   ), args: '--gtest_output=xml:syzygy.xml', timeout: 90)
 
-  test('EncodePositionForNN', 
+  test('EncodePositionForNN',
     executable('encoder_test', 'src/neural/encoder_test.cc', pb_files,
     include_directories: includes, link_with: lc0_lib,
     dependencies: [gtest]
   ), args: '--gtest_output=xml:encoder.xml', timeout: 90)
-
 endif
 
 
@@ -596,7 +628,7 @@ if get_option('python_bindings')
   python = pymod.find_installation('python3')
   if python.language_version() < '3.7'
     error('You need python 3.7 or newer')
-  endif  
+  endif
   py_bindings_generator = find_program('scripts/gen_py_bindings.py')
 
   gen_py_bindings = custom_target('backends', input:[], output:['backends.cc'],

diff --git a/meson_options.txt b/meson_options.txt
@@ -38,7 +38,7 @@ option('dnnl_dir',
        value: '',
        description: 'Paths to DNNL install directory')
 
-option('cudnn_include', 
+option('cudnn_include',
        type: 'array',
        value: ['/opt/cuda/include/', '/usr/local/cuda/include/', '/usr/lib/cuda/include/'],
        description: 'Paths to cudnn include directory')
@@ -108,6 +108,11 @@ option('accelerate',
        value: true,
        description: 'Enable Accelerate BLAS support')
 
+option('malloc',
+       type : 'string',
+       value: '',
+       description: 'Use alternative memory allocator, e.g. tcmalloc/jemalloc')
+
 option('popcnt',
        type: 'boolean',
        value: true,
@@ -118,6 +123,11 @@ option('pext',
        value: false,
        description: 'Use the pext instruction')
 
+option('neon',
+       type: 'boolean',
+       value: true,
+       description: 'Use neon instructions on arm processors')
+
 option('gtest',
        type: 'boolean',
        value: true,
@@ -137,3 +147,8 @@ option('python_bindings',
        type: 'boolean',
        value: false,
        description: 'Build Python bindings for the python to bind.')
+
+option('cc_cuda',
+       type: 'string',
+       value: '',
+       description: 'Build for a specific cuda CC, e.g. -Dcc_cuda=35 for CC 3.5')
diff --git a/scripts/appveyor_win_build.cmd b/scripts/appveyor_win_build.cmd
@@ -10,8 +10,9 @@ IF %NAME%==cpu-dnnl copy C:\cache\dnnl_win_1.5.0_cpu_vcomp\bin\dnnl.dll
 IF %PGO%==true (
   IF %OPENCL%==true copy C:\cache\opencl-nug.0.777.77\build\native\bin\OpenCL.dll
   IF %CUDA%==true copy "%CUDA_PATH%"\bin\*.dll
-  IF %CUDNN%==true copy %CUDA_PATH%\cuda\bin\cudnn64_7.dll
+  IF %CUDNN%==true copy "%CUDA_PATH%"\cuda\bin\cudnn64_7.dll
   lc0 benchmark --num-positions=1 --weights=c:\cache\%NET%.pb.gz --backend=random --movetime=10000
+  lc0 benchmark --num-positions=1 --weights=c:\cache\%NET%.pb.gz --backend=random --movetime=10000 --multi-gather=true
 )
 cd ..
 IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGOptimize /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"