From 3a423e69220565715e446b0a4a59a217dc69f3d9 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Fri, 27 Sep 2024 02:58:01 -0500 Subject: [PATCH] vulkan : fix build for GGML_VULKAN_RUN_TESTS, add TFLOPS to log (#961) --- AUTHORS | 90 ++++++++++++++++++++++++++++++++++++++++++++- src/ggml-vulkan.cpp | 19 +++++++--- 2 files changed, 102 insertions(+), 7 deletions(-) diff --git a/AUTHORS b/AUTHORS index 850e426fb..b5e2c81fa 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,16 +1,24 @@ -# date: Tue Apr 9 20:16:51 EEST 2024 +# date: Thu Sep 26 09:19:50 CDT 2024 # this file is auto-generated by scripts/gen-authors.sh 0cc4m +65a <10104049+65a@users.noreply.github.com> AT Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Adam Tazi <52357206+ad1tazi@users.noreply.github.com> AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> +AidanBeltonS +Akarshan Biswas +Albert Jin +Alberto Cabrera Pérez +Alberto Cabrera Pérez Alex Azarov +Alex O'Connell <35843486+acon96@users.noreply.github.com> Alex von Gluck IV AmbientL <107641468+AmbientL@users.noreply.github.com> AmirAli Mirian <37371367+amiralimi@users.noreply.github.com> Ananta Bastola +Andreas (Andi) Kunar Andrei Arjun Ashraful Islam @@ -18,42 +26,66 @@ Astariul <43774355+astariul@users.noreply.github.com> AsukaMinato Avi Lumelsky Bart Pelle <3662930+Velocity-@users.noreply.github.com> +Ben Ashbaugh Borislav Stanimirov Brad Ito +Brad Murray <59848399+bradmurray-dt@users.noreply.github.com> +Brian Bryan Lozano Carolinabanana <140120812+Carolinabanana@users.noreply.github.com> +CarterLi999 <664681047@qq.com> Cebtenzzre +Chen Xi +Chen Xi +Chris Elrod +Clint Herron +Conrad Kramer Cordeiro <1471463+ocordeiro@users.noreply.github.com> Cristiano Calcagno +DAN™ Dan Forbes Daniel Bevenius +Daniel Ziegenberg +Daniele <57776841+daniandtheweb@users.noreply.github.com> Daulet Zhanguzin +Dave +Dave Airlie +Dave Airlie David Miller +DavidKorczynski Davidson Francis +Dibakar Gope Didzis Gosko Diogo +Djip007 Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com> Ebey Abraham Eldar Yusupov +Emmanuel Durand Engininja2 <139037756+Engininja2@users.noreply.github.com> Erik Scholz Ettore Di Giacinto Eve <139727413+netrunnereve@users.noreply.github.com> F1L1P <78918286+F1L1Pv2@users.noreply.github.com> +Faisal Zaghloul FantasyGmm <16450052+FantasyGmm@users.noreply.github.com> Felix Finn Voorhees GainLee George Hindle Georgi Gerganov +Gilad S Guillaume Wenzek Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com> Herman Semenov Hirochika Matsumoto +Hong Bo PENG Hugo Rosenkranz-Costa Hyunsung Lee IGUILIZ Salah-Eddine <76955987+salahiguiliz@users.noreply.github.com> Ian Bull +Ikko Eltociear Ashimine +Ivan Filipov <159561759+vanaka11@users.noreply.github.com> Ivan Stepanov Ivan Zdane Jack Mousseau @@ -62,17 +94,23 @@ JacobLinCool Jakob Frick Jan Ploski Jared Van Bortel +Jeff Bolz Jeffrey Quesnelle +Jeroen Mostert Jiahao Li JidongZhang-THU <1119708529@qq.com> Jiří Podivín <66251151+jpodivin@users.noreply.github.com> Jo Liss +Joe Todd Johannes Gäßler John Balis Josh Bleecher Snyder Judd +Justina Cho Justine Tunney +Justine Tunney Kawrakow <48489457+ikawrakow@users.noreply.github.com> +Kevin Gibbons Konstantin Zhuravlyov Kylin <56434533+KyL0N@users.noreply.github.com> LoganDark @@ -80,17 +118,29 @@ LoganDark LostRuins <39025047+LostRuins@users.noreply.github.com> Lukas Möller M. Yusuf Sarıgöz +Mahesh Madhav <67384846+heshpdx@users.noreply.github.com> MaiHD +Mark Zhuang +Markus Tavenrath +Masaya, Kato <62578291+msy-kato@users.noreply.github.com> Mathijs de Bruin +Matt Stephenson +Max Krasnyansky Mayank Kumar Pal Meng, Hengyu +Mengqing Cao Metal Whale <45712559+metalwhale@users.noreply.github.com> Michael Klimenko Michael Podvitskiy Michael Verrilli +Molly Sophia +Natsu +Neo Zhang <14088817+arthw@users.noreply.github.com> Neo Zhang Jianyu Neuman Vong Nevin +Nicholai Tukanov +Nico Bosshard Nouamane Tazi Olivier Chafik Olivier Chafik @@ -101,6 +151,9 @@ Paul Tsochantaris Philpax Pierre Alexandre SCHEMBRI Playdev +Przemysław Pawełczyk +R0CKSTAR +R0CKSTAR Radoslav Gerganov Radosław Gryta Ravindra Marella @@ -109,15 +162,20 @@ Reinforce-II Reza Rezvan Rick G <26732651+TheFlipbook@users.noreply.github.com> RiverZhou +Ronsor +Rotem Dan Ryan Hitchman +Salvatore Mesoraca Sam Spilsbury Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> Santtu Keskinen Sergio López Shijie <821898965@qq.com> Siddharth Ramakrishnan +Sigbjørn Skjæret Skyler Celestinian-Sterling <80314197+Celestinian@users.noreply.github.com> Slava Primenko +Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com> Steward Garcia <57494570+FSSRepo@users.noreply.github.com> Supreet Sethi Takuya Takeuchi @@ -127,38 +185,61 @@ Tanmay Sachan Timothy Cronin <40186632+4imothy@users.noreply.github.com> Tom Bailey Tom Jobbins <784313+TheBloke@users.noreply.github.com> +Tony Wasserka <4840017+neobrain@users.noreply.github.com> Tyé singwa <92231658+tye-singwa@users.noreply.github.com> UEXTM.com <84163508+uextm@users.noreply.github.com> WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com> +William Tambellini +William Tambellini XiaotaoChen +Xuan Son Nguyen Yavor Ivanov YavorGIvanov +Yilong Guo +Yilong Guo +agray3 apcameron <37645737+apcameron@users.noreply.github.com> appvoid <78444142+appvoid@users.noreply.github.com> ariez-xyz <41232910+ariez-xyz@users.noreply.github.com> automaticcat +bandoti <141645996+bandoti@users.noreply.github.com> bmwl bobqianic <129547291+bobqianic@users.noreply.github.com> bssrdf chengchi compilade <113953597+compilade@users.noreply.github.com> +compilade ddpasa <112642920+ddpasa@users.noreply.github.com> denersc dscripka fitzsim +fraxy-v <65565042+fraxy-v@users.noreply.github.com> goerch +goldwaving <77494627+goldwaving@users.noreply.github.com> hidenorly +hipudding hydai jaeminSon +jdomke <28772296+jdomke@users.noreply.github.com> +jiez <373447296@qq.com> johnson442 <56517414+johnson442@users.noreply.github.com> +junchao-loongson <68935141+junchao-loongson@users.noreply.github.com> +k.h.lai katsu560 <118887472+katsu560@users.noreply.github.com> klosax <131523366+klosax@users.noreply.github.com> +kunnis +l3utterfly le.chang leejet <31925346+leejet@users.noreply.github.com> leejet +liuwei-git <14815172+liuwei-git@users.noreply.github.com> +luoyu-intel magicse +mashizora <30516315+mashizora@users.noreply.github.com> +matteo ochafik otaGran +pengxin99 pikalover6 <49179590+pikalover6@users.noreply.github.com> postmasters sjinzh @@ -168,8 +249,15 @@ snadampal <87143774+snadampal@users.noreply.github.com> taher <8665427+nullhook@users.noreply.github.com> texmex76 <40733439+texmex76@users.noreply.github.com> the-crypt-keeper <84680712+the-crypt-keeper@users.noreply.github.com> +thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> +ucag.li ulatekh +wangshuai09 <391746016@qq.com> +woachk <24752637+woachk@users.noreply.github.com> yangyaofei +yuri@FreeBSD +zhentaoyu zhouwg <6889919+zhouwg@users.noreply.github.com> +zhouwg 布客飞龙 <562826179@qq.com> 旺旺碎冰冰 <38837039+Cyberhan123@users.noreply.github.com> diff --git a/src/ggml-vulkan.cpp b/src/ggml-vulkan.cpp index f9da45881..9345ea114 100644 --- a/src/ggml-vulkan.cpp +++ b/src/ggml-vulkan.cpp @@ -5008,6 +5008,8 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t } } + ggml_pipeline_allocate_descriptor_sets(ctx->device); + vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); @@ -5124,7 +5126,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t avg_err /= m * n; - std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms avg_err=" << avg_err << std::endl; + double tflops = 2.0*m*n*k*batch*num_it / (time / 1000.0) / (1000.0*1000.0*1000.0*1000.0); + + std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl; if (avg_err > 0.1) { std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl; @@ -5246,12 +5250,14 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_ ggml_pipeline_request_descriptor_sets(ctx->device, p, 1); + ggml_pipeline_allocate_descriptor_sets(ctx->device); + ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz); vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue); ggml_vk_ctx_begin(ctx->device, subctx); const std::vector pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne }; - ggml_vk_dispatch_pipeline(ctx, subctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1}); + ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1}); ggml_vk_ctx_end(subctx); auto begin = std::chrono::high_resolution_clock::now(); @@ -5378,6 +5384,8 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m, } } + ggml_pipeline_allocate_descriptor_sets(ctx->device); + ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz); ggml_vk_buffer_write(y_buf, 0, y, y_sz); @@ -5445,7 +5453,9 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m, avg_err /= m * n; - std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms avg_err=" << avg_err << std::endl; + double tflops = 2.0*m*n*k*batch*num_it / (time_ms / 1000.0) / (1000.0*1000.0*1000.0*1000.0); + + std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl; if (avg_err > 0.01 || std::isnan(avg_err)) { std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl; @@ -5497,9 +5507,6 @@ static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor) static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { #if defined(GGML_VULKAN_RUN_TESTS) - ctx->staging = ggml_vk_create_buffer_check(ctx->device, 100ul * 1024ul * 1024ul, - vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached, - vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32); ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_0); ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_1);