Skip to content

Commit

Permalink
vulkan : fix build for GGML_VULKAN_RUN_TESTS, add TFLOPS to log (#961)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffbolznv authored Sep 27, 2024
1 parent e6643c6 commit 3a423e6
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 7 deletions.
90 changes: 89 additions & 1 deletion AUTHORS
Original file line number Diff line number Diff line change
@@ -1,59 +1,91 @@
# date: Tue Apr 9 20:16:51 EEST 2024
# date: Thu Sep 26 09:19:50 CDT 2024
# this file is auto-generated by scripts/gen-authors.sh

0cc4m <[email protected]>
65a <[email protected]>
AT <[email protected]>
Abhilash Majumder <[email protected]>
Adam Tazi <[email protected]>
AidanBeltonS <[email protected]>
AidanBeltonS <[email protected]>
Akarshan Biswas <[email protected]>
Albert Jin <[email protected]>
Alberto Cabrera Pérez <[email protected]>
Alberto Cabrera Pérez <[email protected]>
Alex Azarov <[email protected]>
Alex O'Connell <[email protected]>
Alex von Gluck IV <[email protected]>
AmbientL <[email protected]>
AmirAli Mirian <[email protected]>
Ananta Bastola <[email protected]>
Andreas (Andi) Kunar <[email protected]>
Andrei <[email protected]>
Arjun <[email protected]>
Ashraful Islam <[email protected]>
Astariul <[email protected]>
AsukaMinato <[email protected]>
Avi Lumelsky <[email protected]>
Bart Pelle <[email protected]>
Ben Ashbaugh <[email protected]>
Borislav Stanimirov <[email protected]>
Brad Ito <[email protected]>
Brad Murray <[email protected]>
Brian <[email protected]>
Bryan Lozano <[email protected]>
Carolinabanana <[email protected]>
CarterLi999 <[email protected]>
Cebtenzzre <[email protected]>
Chen Xi <[email protected]>
Chen Xi <[email protected]>
Chris Elrod <[email protected]>
Clint Herron <[email protected]>
Conrad Kramer <[email protected]>
Cordeiro <[email protected]>
Cristiano Calcagno <[email protected]>
DAN™ <[email protected]>
Dan Forbes <[email protected]>
Daniel Bevenius <[email protected]>
Daniel Ziegenberg <[email protected]>
Daniele <[email protected]>
Daulet Zhanguzin <[email protected]>
Dave <[email protected]>
Dave Airlie <[email protected]>
Dave Airlie <[email protected]>
David Miller <[email protected]>
DavidKorczynski <[email protected]>
Davidson Francis <[email protected]>
Dibakar Gope <[email protected]>
Didzis Gosko <[email protected]>
Diogo <[email protected]>
Djip007 <[email protected]>
Dr. Tom Murphy VII Ph.D <[email protected]>
Ebey Abraham <[email protected]>
Eldar Yusupov <[email protected]>
Emmanuel Durand <[email protected]>
Engininja2 <[email protected]>
Erik Scholz <[email protected]>
Ettore Di Giacinto <[email protected]>
Eve <[email protected]>
F1L1P <[email protected]>
Faisal Zaghloul <[email protected]>
FantasyGmm <[email protected]>
Felix <[email protected]>
Finn Voorhees <[email protected]>
GainLee <[email protected]>
George Hindle <[email protected]>
Georgi Gerganov <[email protected]>
Gilad S <[email protected]>
Guillaume Wenzek <[email protected]>
Halalaluyafail3 <[email protected]>
Herman Semenov <[email protected]>
Hirochika Matsumoto <[email protected]>
Hong Bo PENG <[email protected]>
Hugo Rosenkranz-Costa <[email protected]>
Hyunsung Lee <[email protected]>
IGUILIZ Salah-Eddine <[email protected]>
Ian Bull <[email protected]>
Ikko Eltociear Ashimine <[email protected]>
Ivan Filipov <[email protected]>
Ivan Stepanov <[email protected]>
Ivan Zdane <[email protected]>
Jack Mousseau <[email protected]>
Expand All @@ -62,35 +94,53 @@ JacobLinCool <[email protected]>
Jakob Frick <[email protected]>
Jan Ploski <[email protected]>
Jared Van Bortel <[email protected]>
Jeff Bolz <[email protected]>
Jeffrey Quesnelle <[email protected]>
Jeroen Mostert <[email protected]>
Jiahao Li <[email protected]>
JidongZhang-THU <[email protected]>
Jiří Podivín <[email protected]>
Jo Liss <[email protected]>
Joe Todd <[email protected]>
Johannes Gäßler <[email protected]>
John Balis <[email protected]>
Josh Bleecher Snyder <[email protected]>
Judd <[email protected]>
Justina Cho <[email protected]>
Justine Tunney <[email protected]>
Justine Tunney <[email protected]>
Kawrakow <[email protected]>
Kevin Gibbons <[email protected]>
Konstantin Zhuravlyov <[email protected]>
Kylin <[email protected]>
LoganDark <[email protected]>
LoganDark <[email protected]>
LostRuins <[email protected]>
Lukas Möller <[email protected]>
M. Yusuf Sarıgöz <[email protected]>
Mahesh Madhav <[email protected]>
MaiHD <[email protected]>
Mark Zhuang <[email protected]>
Markus Tavenrath <[email protected]>
Masaya, Kato <[email protected]>
Mathijs de Bruin <[email protected]>
Matt Stephenson <[email protected]>
Max Krasnyansky <[email protected]>
Mayank Kumar Pal <[email protected]>
Meng, Hengyu <[email protected]>
Mengqing Cao <[email protected]>
Metal Whale <[email protected]>
Michael Klimenko <[email protected]>
Michael Podvitskiy <[email protected]>
Michael Verrilli <[email protected]>
Molly Sophia <[email protected]>
Natsu <[email protected]>
Neo Zhang <[email protected]>
Neo Zhang Jianyu <[email protected]>
Neuman Vong <[email protected]>
Nevin <[email protected]>
Nicholai Tukanov <[email protected]>
Nico Bosshard <[email protected]>
Nouamane Tazi <[email protected]>
Olivier Chafik <[email protected]>
Olivier Chafik <[email protected]>
Expand All @@ -101,6 +151,9 @@ Paul Tsochantaris <[email protected]>
Philpax <[email protected]>
Pierre Alexandre SCHEMBRI <[email protected]>
Playdev <[email protected]>
Przemysław Pawełczyk <[email protected]>
R0CKSTAR <[email protected]>
R0CKSTAR <[email protected]>
Radoslav Gerganov <[email protected]>
Radosław Gryta <[email protected]>
Ravindra Marella <[email protected]>
Expand All @@ -109,15 +162,20 @@ Reinforce-II <[email protected]>
Reza Rezvan <[email protected]>
Rick G <[email protected]>
RiverZhou <[email protected]>
Ronsor <[email protected]>
Rotem Dan <[email protected]>
Ryan Hitchman <[email protected]>
Salvatore Mesoraca <[email protected]>
Sam Spilsbury <[email protected]>
Sanchit Gandhi <[email protected]>
Santtu Keskinen <[email protected]>
Sergio López <[email protected]>
Shijie <[email protected]>
Siddharth Ramakrishnan <[email protected]>
Sigbjørn Skjæret <[email protected]>
Skyler Celestinian-Sterling <[email protected]>
Slava Primenko <[email protected]>
Srihari-mcw <[email protected]>
Steward Garcia <[email protected]>
Supreet Sethi <[email protected]>
Takuya Takeuchi <[email protected]>
Expand All @@ -127,38 +185,61 @@ Tanmay Sachan <[email protected]>
Timothy Cronin <[email protected]>
Tom Bailey <[email protected]>
Tom Jobbins <[email protected]>
Tony Wasserka <[email protected]>
Tyé singwa <[email protected]>
UEXTM.com <[email protected]>
WillCorticesAI <[email protected]>
William Tambellini <[email protected]>
William Tambellini <[email protected]>
XiaotaoChen <[email protected]>
Xuan Son Nguyen <[email protected]>
Yavor Ivanov <[email protected]>
YavorGIvanov <[email protected]>
Yilong Guo <[email protected]>
Yilong Guo <[email protected]>
agray3 <[email protected]>
apcameron <[email protected]>
appvoid <[email protected]>
ariez-xyz <[email protected]>
automaticcat <[email protected]>
bandoti <[email protected]>
bmwl <[email protected]>
bobqianic <[email protected]>
bssrdf <[email protected]>
chengchi <[email protected]>
compilade <[email protected]>
compilade <[email protected]>
ddpasa <[email protected]>
denersc <[email protected]>
dscripka <[email protected]>
fitzsim <[email protected]>
fraxy-v <[email protected]>
goerch <[email protected]>
goldwaving <[email protected]>
hidenorly <[email protected]>
hipudding <[email protected]>
hydai <[email protected]>
jaeminSon <[email protected]>
jdomke <[email protected]>
jiez <[email protected]>
johnson442 <[email protected]>
junchao-loongson <[email protected]>
k.h.lai <[email protected]>
katsu560 <[email protected]>
klosax <[email protected]>
kunnis <[email protected]>
l3utterfly <[email protected]>
le.chang <[email protected]>
leejet <[email protected]>
leejet <[email protected]>
liuwei-git <[email protected]>
luoyu-intel <[email protected]>
magicse <[email protected]>
mashizora <[email protected]>
matteo <[email protected]>
ochafik <[email protected]>
otaGran <[email protected]>
pengxin99 <[email protected]>
pikalover6 <[email protected]>
postmasters <[email protected]>
sjinzh <[email protected]>
Expand All @@ -168,8 +249,15 @@ snadampal <[email protected]>
taher <[email protected]>
texmex76 <[email protected]>
the-crypt-keeper <[email protected]>
thewh1teagle <[email protected]>
ucag.li <[email protected]>
ulatekh <[email protected]>
wangshuai09 <[email protected]>
woachk <[email protected]>
yangyaofei <[email protected]>
yuri@FreeBSD <yuri@FreeBSD>
zhentaoyu <[email protected]>
zhouwg <[email protected]>
zhouwg <[email protected]>
布客飞龙 <[email protected]>
旺旺碎冰冰 <[email protected]>
19 changes: 13 additions & 6 deletions src/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5008,6 +5008,8 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
}
}

ggml_pipeline_allocate_descriptor_sets(ctx->device);

vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
Expand Down Expand Up @@ -5124,7 +5126,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t

avg_err /= m * n;

std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms avg_err=" << avg_err << std::endl;
double tflops = 2.0*m*n*k*batch*num_it / (time / 1000.0) / (1000.0*1000.0*1000.0*1000.0);

std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;

if (avg_err > 0.1) {
std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
Expand Down Expand Up @@ -5246,12 +5250,14 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_

ggml_pipeline_request_descriptor_sets(ctx->device, p, 1);

ggml_pipeline_allocate_descriptor_sets(ctx->device);

ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);

vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
ggml_vk_ctx_begin(ctx->device, subctx);
const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
ggml_vk_dispatch_pipeline(ctx, subctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
ggml_vk_ctx_end(subctx);

auto begin = std::chrono::high_resolution_clock::now();
Expand Down Expand Up @@ -5378,6 +5384,8 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
}
}

ggml_pipeline_allocate_descriptor_sets(ctx->device);

ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
ggml_vk_buffer_write(y_buf, 0, y, y_sz);

Expand Down Expand Up @@ -5445,7 +5453,9 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,

avg_err /= m * n;

std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms avg_err=" << avg_err << std::endl;
double tflops = 2.0*m*n*k*batch*num_it / (time_ms / 1000.0) / (1000.0*1000.0*1000.0*1000.0);

std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;

if (avg_err > 0.01 || std::isnan(avg_err)) {
std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
Expand Down Expand Up @@ -5497,9 +5507,6 @@ static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor)

static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
#if defined(GGML_VULKAN_RUN_TESTS)
ctx->staging = ggml_vk_create_buffer_check(ctx->device, 100ul * 1024ul * 1024ul,
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32);
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_0);
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_1);
Expand Down

0 comments on commit 3a423e6

Please sign in to comment.