Skip to content

Commit

Permalink
xe: jit: gemm: more large-tile FHS strategies
Browse files Browse the repository at this point in the history
  • Loading branch information
petercad committed Oct 22, 2024
1 parent 04ce01f commit 2be4e21
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/gpu/intel/jit/gemm/selector/db/kernel.db
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,9 @@ auto _CATALOG_ = kcatalog::toFlatCatalog({
{{'F', "gemm", {"F", "H", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#I"}, "aB32+m16@32 at16+m16@32 aB wg 2x4x4 kr cab3 ks32 xaf st vav hi pt sr br bk0 nb 2x4 grf256 sys sn l4 l2d", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 32}, {2, 4, 4}, 1, (WGType) 1, 261, 18432, 18432, {2, 2, 4}, {true, true, true}}, {'E', 17, {1.08698e+06, 601592, -687.022, 111689, 0, 0, 1.25555, 1.87166, 2.40426, 4.96622, 0.0272706, 0.0272706, 0, 0.946361, 1.27483, 0.962139, 2.75611e-12}}},
{{'F', "gemm", {"F", "H", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, -1, -1}, {4, 4, 1}, "ABI"}, "aB16x2+m16@32 at16x2+m16@32 aB wg 1x16 xaf st vav li pt sr br sb64 bk0 grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 224, (LoopType) 255, (LoopType) 255}, {262144, 262144, 16777216}, {262144, 262144, 32}, {16, 16, 16}, {1, 16, 1}, 1, (WGType) 1, 441, 0, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.03366e+06, 206195, 0, 0, 2.5985e+06, 2.90488e+06, 5.86262, 0.614358, 0.589806, 1.16158, 0.0300187, 0.00214889, 0.0294786, 0.604289, 1.43473, 0.822773, 7.94852e-12}}},
{{'F', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 4, 1}, "IB"}, "aB64 at32 aS wg 1x1x16 ikr af vav sr sb256 bk0 bm0 sys rr dm", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {16777216, 8192, 16777216}, {8192, 8192, 16777216}, {16, 16, 64}, {1, 1, 16}, 1, (WGType) 0, 4357, 0, 1024, {8, 4, 4}, {true, true, true}}, {'E', 17, {1.21597e+06, 165524, 28763.4, 9411.33, 0, 0, 0.278476, 0.599765, 0.920572, 5.21577, 0.021695, 0.0325353, 0.0148497, 1, 1.30279, 0.785158, 1.47381e-11}}},
{{'F', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABIp"}, "at32+m32@64 am32/16+m16@64 aB wg 8x4 xaf st vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 1048576, 16777216}, {524288, 1048576, 32}, {32, 64, 32}, {8, 4, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {875588, 627440, 0, 0, 5.68525e+06, 9.24058e+06, 0.613898, 0.645764, 0.795901, 1.22977, 0.00405493, 0.00405493, 0, 1, 1.86804, 1.21451, 2.52854e-12}}},
{{'F', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABIp"}, "at32+m32@64 am32/16+m16@64 aB wg 8x4 xaf st vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 1048576, 16777216}, {524288, 1048576, 32}, {32, 64, 32}, {8, 4, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {867949, 633849, 0, 0, 5.89824e+06, 1.00844e+07, 0.930403, 0.918787, 0.795424, 1.23078, 0.00404692, 0.00404692, 0, 0.990134, 1.69975, 1.14597, 2.34849e-12}}},
{{'F', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "IABs"}, "at32+m32@64 am32+m32@64 aB wg 8x4 xaf st vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 917504, 16777216}, {524288, 917504, 32}, {32, 56, 32}, {8, 4, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {871124, 600159, 0, 0, 5.72375e+06, 8.32307e+06, 0.555762, 0.650262, 0.789585, 1.21694, 0.00411938, 0.00411938, 0, 1, 1.69975, 1.10825, 2.61032e-12}}},
{{'F', "gemm", {"F", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "IABs"}, "at64x2+m64@64 am32+m32@64 aB wg 8x4 xaf fx vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 786432, 16777216}, {524288, 786432, 64}, {32, 48, 64}, {8, 4, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {866537, 568292, 0, 0, 4.29425e+06, 7.31546e+06, 0.475793, 0.638791, 0.753591, 1.18019, 0.0041899, 0.0041899, 0, 1, 1.78867, 1.11209, 1.76449e-12}}},
{{'F', "gemm", {"F", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8x2+B8@16 aS8+S8@24 aB wg 1x4 kc8 nse hi pt sr sb256 bk0 sn", {16, (LoopType) 255, 128, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {16, 8, 8}, {1, 4, 1}, 1, (WGType) 1, 257, 0, 0, {1, 4, 4}, {true, true, true}}, {'E', 17, {1.16538e+06, 40635.2, 0, 0, 0, 0, 1.30731, 1.53858, 0.584971, 1.42067, 0.0634061, 0.0581975, 0.0161667, 1, 1.44276, 1.00478, 2.34818e-11}}},
{{'F', "gemm", {"F", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, ""}, "aB8x2+B16@8 aS8x2+S32@8 aB wg 2x8x2 kr kc8 nse hi pt sr kv sb256 bk0 sn grf256 afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {1048576, 262144, 16777216}, {8192, 8192, 0}, {32, 16, 8}, {2, 8, 2}, 1, (WGType) 1, 413, 0, 65536, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.12483e+06, -574330, -20527.6, 853244, 4.03046e+06, 0, 2.33007, 1.51079, 0.80632, 1.51643, 0.0625462, -1.54388e-05, 0.0629715, 0.425906, 1.06028, 0.229788, 1.06051e-11}}},
{{'F', "gemm", {"F", "S", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 4, -1}, {1, 1, 1}, ""}, "am8+B8@8 at8x2 aB wg 4x1x16 kr kc8 nse li nmk pt sr bk0 sn kv afb sb32 l2d", {16, (LoopType) 255, 128, {(LoopType) 225, (LoopType) 255, (LoopType) 2}, {524288, 65536, 16777216}, {8192, 8192, 0}, {16, 4, 8}, {4, 1, 16}, 1, (WGType) 1, 413, 0, 2048, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.21115e+06, -59718.2, -11600.6, 104303, 3.06381e+06, 0, 0.860506, 9.14342, 0.769527, 1.14843, 0.0733058, 0.0350639, 0.04512, 0.901895, 1.307, 0.986093, 2.06541e-11}}},
Expand Down

0 comments on commit 2be4e21

Please sign in to comment.