From d5298d233e6e44f5071df31a22292391191ed1e4 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Fri, 23 Jan 2026 00:05:56 +0000 Subject: [PATCH 01/10] Update gfx950 quick-tune lists for gemm and conv. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 369 ++++++++++++------ 1 file changed, 258 insertions(+), 111 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 18396340730e..8a0f0f97eea0 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -371,12 +371,16 @@ const StringRef PopulateParamsXDL::initParametersF32GemmGfx942[] = { // BEGIN_GEMM_XDL_f32_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersF32GemmGfx950[] = { - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,128,8,16,128,16,4,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,4,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,4,1,2,2,0,0,1,1" + "v4:16,16,8,16,16,16,8,1,4,2,0,0,1,1", + "v4:64,32,4,32,16,16,8,1,4,2,0,0,1,1", + "v4:64,64,4,64,16,16,4,1,4,2,0,0,1,1", + "v4:32,32,4,16,16,16,8,1,4,2,0,0,1,1", + "v4:16,16,4,16,16,16,32,1,4,2,0,0,1,1", + "v4:128,256,4,128,64,16,4,1,2,2,0,0,1,1", + "v4:32,16,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,4,2,0,0,1,1", + "v4:64,16,8,64,16,16,1,1,1,2,0,0,1,1", + "v4:16,128,4,16,16,16,16,1,4,2,0,0,1,1" }; // END_GEMM_XDL_f32_gfx950_DEFS @@ -456,39 +460,66 @@ const StringRef PopulateParamsXDL::initParametersF32ConvGfx942[] = { // BEGIN_CONV_XDL_f32_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersF32ConvGfx950[] = { - "v4:128,128,4,128,32,16,1,1,1,2,0,0,1,1", - "v4:32,64,8,16,32,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:64,32,8,16,32,16,4,1,1,2,0,0,1,1", + "v4:64,32,8,32,16,16,4,1,4,2,0,0,1,1", + "v4:32,32,4,16,16,16,8,1,4,2,0,0,1,1", + "v4:16,16,4,16,16,16,8,1,4,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,4,2,0,0,1,1", + "v4:64,16,4,16,16,16,16,1,4,2,0,0,1,1", + "v4:16,16,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,16,8,32,16,16,4,1,4,2,0,0,1,1", + "v4:64,32,4,32,16,16,8,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,16,8,16,16,16,4,1,2,2,0,0,1,1", "v4:32,32,8,16,16,16,4,1,2,2,0,0,1,1", "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:128,32,4,64,16,16,8,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,2,2,0,0,1,1", - "v4:256,32,4,128,16,16,4,1,1,2,0,0,1,1", - "v4:128,32,8,32,32,16,4,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,1,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,4,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,4,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,4,64,16,16,1,1,1,2,0,0,1,1", - "v4:128,64,4,128,16,16,4,1,1,2,0,0,1,1", - "v4:64,64,4,32,32,16,8,1,1,2,0,0,1,1", - "v4:64,32,4,16,32,16,4,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,128,32,16,1,1,2,2,0,0,1,1", - "v4:32,32,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,2,32,32,32,1,1,2,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:128,32,4,32,32,32,8,1,2,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:128,256,2,128,64,32,4,1,2,2,0,0,1,1", + "v4:16,32,8,16,32,16,4,1,4,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,3,2,0,0,1,1", + "v4:64,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,16,4,1,1,2,0,0,1,1", "v4:64,128,4,64,32,16,4,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,64,2,64,32,32,1,1,2,2,0,0,1,1", - "v4:64,32,8,32,16,16,4,1,2,2,0,0,1,1" + "v4:64,16,4,64,16,16,8,1,4,2,0,0,1,1", + "v4:128,32,4,32,32,16,8,1,4,2,0,0,1,1", + "v4:128,128,4,128,128,16,1,1,4,2,0,0,1,1", + "v4:32,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:256,64,4,256,16,16,4,1,4,2,0,0,1,1", + "v4:128,64,8,128,64,16,1,1,4,2,0,0,1,1", + "v4:64,128,4,64,16,16,4,1,2,2,0,0,1,1", + "v4:128,64,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,128,32,16,4,1,2,2,0,0,1,1", + "v4:32,64,4,32,64,16,4,1,1,2,0,0,1,1", + "v4:64,32,4,64,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,4,1,4,2,0,0,1,1", + "v4:64,32,4,64,16,16,4,1,4,2,0,0,1,1", + "v4:128,64,4,128,64,16,4,1,4,2,0,0,1,1", + "v4:32,128,4,32,16,16,4,1,2,2,0,0,1,1", + "v4:32,128,2,32,32,32,4,1,2,2,0,0,1,1", + "v4:32,32,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:64,256,4,64,64,32,1,1,4,2,0,0,1,1", + "v4:128,256,4,128,128,16,1,1,4,2,0,0,1,1", + "v4:32,256,4,32,64,16,4,1,3,2,0,0,1,1", + "v4:64,32,4,64,32,16,8,1,4,2,0,0,1,1", + "v4:192,64,4,96,32,16,8,1,4,2,0,0,1,1", + "v4:48,16,4,48,16,16,8,1,1,2,0,0,1,1", + "v4:256,256,8,256,16,16,1,1,2,2,0,0,1,1", + "v4:256,32,4,64,32,16,4,1,1,2,0,0,1,1", + "v4:64,32,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,256,4,64,32,32,4,1,3,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,16,8,64,16,16,4,1,1,2,0,0,1,1", + "v4:128,256,4,128,16,16,8,1,1,2,0,0,1,1", + "v4:160,80,8,80,80,16,4,1,4,2,0,0,1,1", + "v4:192,64,8,48,32,16,4,1,1,2,0,0,1,1", + "v4:128,160,8,32,160,32,4,1,1,2,0,0,1,1", + "v4:16,16,4,16,16,16,4,1,2,0,1,4,1,1", + "v4:128,48,8,16,48,16,4,1,2,2,0,0,1,1", + "v4:96,16,8,96,16,16,4,1,4,2,0,0,1,1", + "v4:96,64,4,48,16,16,4,1,4,1,0,8,1,1", + "v4:16,64,4,16,32,16,4,1,2,0,1,64,1,1", + "v4:192,32,8,48,32,16,4,1,4,1,4,64,1,1", + "v4:192,32,8,96,32,16,4,1,4,1,1,32,1,1", + "v4:48,16,4,48,16,16,4,1,4,0,8,4,1,1", + "v4:64,16,4,16,16,16,16,1,4,1,8,0,1,1", + "v4:80,16,8,80,16,16,4,1,4,0,4,32,1,1" }; // END_CONV_XDL_f32_gfx950_DEFS @@ -564,20 +595,34 @@ const StringRef PopulateParamsXDL::initParametersF16GemmGfx942[] = { // BEGIN_GEMM_XDL_f16_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersF16GemmGfx950[] = { - "v4:32,64,8,16,32,16,8,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,16,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:16,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,32,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,32,1,4,2,0,0,1,1", + "v4:32,32,4,32,16,16,16,1,4,2,0,0,1,1", "v4:128,64,8,32,64,16,8,1,2,2,0,0,1,1", - "v4:128,128,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:256,128,4,128,64,32,8,1,2,2,0,0,1,1", - "v4:32,128,4,16,64,16,4,1,2,2,0,0,1,1", - "v4:32,128,8,32,32,32,8,1,1,2,0,0,1,1" + "v4:32,64,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,3,2,0,0,1,1", + "v4:128,128,8,32,64,32,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,16,1,3,2,0,0,1,1", + "v4:64,64,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:64,16,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,4,64,32,32,8,1,4,2,0,0,1,1", + "v4:32,64,4,32,32,32,8,1,4,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,4,2,0,0,1,1", + "v4:16,64,4,16,16,16,32,1,4,2,0,0,1,1", + "v4:32,128,4,32,64,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,64,64,32,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,64,16,8,1,4,2,0,0,1,1", + "v4:128,256,2,128,64,32,8,1,4,2,0,0,1,1", + "v4:256,128,4,256,32,16,8,1,4,2,0,0,1,1", + "v4:128,128,4,128,64,16,4,1,2,2,0,0,1,1", + "v4:256,128,8,64,128,16,8,1,1,2,0,0,1,1", + "v4:256,256,4,64,64,32,8,1,2,2,0,0,1,1", + "v4:256,256,2,128,64,32,16,1,2,2,0,0,1,1", + "v4:128,256,8,128,32,32,8,1,1,2,0,0,1,1", + "v4:96,48,8,48,48,16,8,1,4,1,2,4,1,1" }; // END_GEMM_XDL_f16_gfx950_DEFS @@ -647,32 +692,87 @@ const StringRef PopulateParamsXDL::initParametersF16ConvGfx942[] = { // BEGIN_CONV_XDL_f16_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersF16ConvGfx950[] = { - "v4:64,128,8,32,64,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:32,64,8,32,32,32,1,1,1,2,0,0,1,1", - "v4:128,128,8,64,64,32,8,1,2,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,4,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,256,8,16,256,16,4,1,2,2,0,0,1,1", - "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:64,256,2,64,64,32,4,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,256,2,32,128,32,4,1,1,2,0,0,1,1", - "v4:64,64,8,64,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,64,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,4,32,64,16,16,1,2,2,0,0,1,1", + "v4:256,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,4,32,32,32,16,1,1,2,0,0,1,1", + "v4:16,32,8,16,32,16,4,1,1,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,2,64,64,32,4,1,1,2,0,0,1,1", + "v4:64,32,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:16,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:32,256,2,32,64,32,4,1,1,2,0,0,1,1", + "v4:128,64,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:32,32,4,32,16,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:32,128,4,32,64,32,8,1,2,2,0,0,1,1", + "v4:64,16,4,64,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,64,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,2,128,32,32,16,1,1,2,0,0,1,1", + "v4:32,64,4,32,16,16,4,1,1,2,0,0,1,1", + "v4:128,64,4,128,32,32,8,1,1,2,0,0,1,1", + "v4:256,128,4,128,64,32,8,1,1,2,0,0,1,1", + "v4:32,128,8,32,32,32,16,1,2,2,0,0,1,1", + "v4:64,32,8,32,16,16,8,1,2,2,0,0,1,1", "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:256,64,2,64,64,32,16,1,2,2,0,0,1,1", + "v4:32,128,4,32,128,16,8,1,1,2,0,0,1,1", + "v4:64,32,4,16,32,16,16,1,1,2,0,0,1,1", + "v4:128,32,4,32,32,16,16,1,2,2,0,0,1,1", + "v4:256,64,4,256,16,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,32,16,4,1,2,2,0,0,1,1", + "v4:128,128,4,64,32,16,16,1,2,2,0,0,1,1", + "v4:128,32,4,32,32,32,16,1,1,2,0,0,1,1", + "v4:128,32,4,64,32,16,8,1,2,2,0,0,1,1", + "v4:16,64,8,16,64,16,16,1,1,2,0,0,1,1", + "v4:256,128,8,64,64,32,8,1,2,2,0,0,1,1", + "v4:64,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:256,32,4,32,16,16,8,1,2,2,0,0,1,1", + "v4:256,64,8,64,32,32,8,1,2,2,0,0,1,1", + "v4:192,32,4,48,32,16,16,1,1,2,0,0,1,1", + "v4:32,64,4,32,16,16,4,1,3,2,0,0,1,1", + "v4:256,32,8,64,32,16,8,1,2,2,0,0,1,1", + "v4:256,128,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,64,32,32,4,1,1,2,0,0,1,1", + "v4:256,256,8,256,16,16,4,1,2,2,0,0,1,1", + "v4:48,32,8,48,16,16,8,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,8,1,2,0,1,4,1,1", + "v4:32,80,8,16,80,16,8,1,2,2,0,0,1,1", + "v4:48,48,4,48,48,16,8,1,2,0,1,32,1,1", + "v4:192,32,4,96,16,16,16,1,1,0,4,32,1,1", + "v4:192,64,8,96,32,32,8,1,2,2,0,0,1,1", + "v4:256,256,8,64,64,32,8,1,1,2,0,0,1,1", + "v4:128,256,4,64,128,16,16,1,2,2,0,0,1,1", + "v4:192,256,2,192,64,32,4,1,1,2,0,0,1,1", + "v4:192,32,8,96,16,16,8,1,2,2,0,0,1,1", + "v4:256,32,8,256,16,16,8,1,1,2,0,0,1,1", "v4:256,32,8,64,32,32,8,1,2,2,0,0,1,1", - "v4:32,128,4,32,32,32,8,1,1,2,0,0,1,1", - "v4:128,32,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:256,128,4,128,64,32,8,1,2,2,0,0,1,1", - "v4:256,64,8,64,64,32,8,1,2,2,0,0,1,1", - "v4:32,32,2,32,32,32,4,1,1,2,0,0,1,1" + "v4:256,64,4,128,32,32,16,1,1,1,2,32,1,1", + "v4:96,16,8,48,16,16,8,1,2,2,0,0,1,1", + "v4:96,256,8,96,32,32,4,1,2,2,0,0,1,1", + "v4:96,64,8,48,32,16,16,1,2,0,4,4,1,1", + "v4:32,16,8,16,16,16,16,1,2,1,4,32,1,1", + "v4:64,96,8,32,96,16,8,1,2,1,4,4,1,1", + "v4:128,64,8,32,32,16,8,1,2,1,2,16,1,1", + "v4:16,128,8,16,64,16,32,1,4,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,1,0,4,16,1,1", + "v4:192,32,8,48,32,16,8,1,2,1,8,64,1,1", + "v4:256,128,2,64,64,32,16,1,2,0,1,8,1,1", + "v4:256,128,4,128,64,16,8,1,1,1,8,8,1,1", + "v4:256,32,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,192,4,32,96,32,8,1,2,0,8,32,1,1", + "v4:96,16,4,96,16,16,8,1,1,1,1,8,1,1", + "v4:96,48,4,96,48,16,8,1,1,0,0,8,1,1", + "v4:96,64,4,48,16,16,16,1,2,0,1,0,1,1" }; // END_CONV_XDL_f16_gfx950_DEFS @@ -776,20 +876,45 @@ const StringRef PopulateParamsXDL::initParametersI8GemmGfx942[] = { // BEGIN_GEMM_XDL_i8_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersI8GemmGfx950[] = { - "v4:64,64,16,32,32,32,16,1,1,2,0,0,1,1", - "v4:32,64,16,32,16,16,16,1,2,2,0,0,1,1", - "v4:32,16,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,64,32,32,32,32,16,1,2,2,0,0,1,1", - "v4:128,64,16,32,64,32,16,1,1,2,0,0,1,1", - "v4:64,64,4,32,32,32,8,1,1,2,0,0,1,1", - "v4:128,128,8,128,32,16,8,1,2,2,0,0,1,1", - "v4:32,16,16,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,128,16,16,1,1,2,0,0,1,1", - "v4:64,16,32,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:16,16,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,32,32,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,32,16,16,16,16,1,1,2,0,0,1,1" + "v4:16,16,8,16,16,16,16,1,4,2,0,0,1,1", + "v4:32,16,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:16,64,4,16,16,16,16,1,4,2,0,0,1,1", + "v4:64,32,32,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,16,16,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,2,2,0,0,1,1", + "v4:64,64,16,32,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,64,32,32,16,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,16,1,2,2,0,0,1,1", + "v4:128,128,4,128,64,32,16,1,4,2,0,0,1,1", + "v4:128,16,8,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,64,16,32,32,32,16,1,2,2,0,0,1,1", + "v4:256,64,8,64,64,32,16,1,1,2,0,0,1,1", + "v4:128,64,32,128,64,32,1,1,4,2,0,0,1,1", + "v4:16,32,8,16,32,16,8,1,3,2,0,0,1,1", + "v4:256,128,4,256,32,16,16,1,4,2,0,0,1,1", + "v4:32,128,8,16,64,16,16,1,1,2,0,0,1,1", + "v4:256,32,32,128,32,32,1,1,4,2,0,0,1,1", + "v4:64,64,16,32,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,32,256,32,16,1,2,2,0,0,1,1", + "v4:128,64,16,64,32,32,16,1,2,2,0,0,1,1", + "v4:32,128,16,32,32,32,16,1,1,2,0,0,1,1", + "v4:64,64,16,32,64,16,16,1,1,2,0,0,1,1", + "v4:64,128,8,32,128,16,8,1,3,2,0,0,1,1", + "v4:256,256,8,256,32,16,16,1,2,2,0,0,1,1", + "v4:64,256,4,64,64,32,8,1,4,2,0,0,1,1", + "v4:64,128,16,64,32,32,16,1,1,2,0,0,1,1", + "v4:64,256,8,64,32,32,8,1,2,2,0,0,1,1", + "v4:64,64,32,16,32,16,16,1,2,2,0,0,1,1", + "v4:128,32,32,32,32,32,16,1,2,2,0,0,1,1", + "v4:192,64,8,96,32,32,16,1,2,2,0,0,1,1", + "v4:128,128,16,64,32,32,16,1,1,2,0,0,1,1", + "v4:256,128,32,128,32,16,1,1,1,2,0,0,1,1", + "v4:128,128,32,16,64,16,16,1,1,2,0,0,1,1", + "v4:128,256,8,128,32,16,16,1,4,2,0,0,1,1", + "v4:256,128,16,32,128,32,16,1,1,2,0,0,1,1", + "v4:128,128,16,32,64,16,16,1,2,1,1,4,1,1", + "v4:256,256,16,32,128,16,8,1,1,2,0,0,1,1" }; // END_GEMM_XDL_i8_gfx950_DEFS @@ -857,27 +982,49 @@ const StringRef PopulateParamsXDL::initParametersI8ConvGfx942[] = { // BEGIN_CONV_XDL_i8_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersI8ConvGfx950[] = { - "v4:64,32,8,16,32,16,8,1,1,2,0,0,1,1", - "v4:32,32,16,32,32,32,1,1,1,2,0,0,1,1", - "v4:64,128,16,64,32,32,1,1,2,2,0,0,1,1", - "v4:128,16,8,64,16,16,16,1,2,2,0,0,1,1", - "v4:256,32,8,64,32,16,16,1,2,2,0,0,1,1", - "v4:64,64,32,32,32,16,1,1,1,2,0,0,1,1", - "v4:64,16,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,16,32,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,128,4,64,64,32,16,1,1,2,0,0,1,1", - "v4:256,64,4,128,32,32,16,1,2,2,0,0,1,1", + "v4:32,16,8,32,16,16,16,1,4,2,0,0,1,1", "v4:64,16,8,32,16,16,16,1,2,2,0,0,1,1", - "v4:64,256,4,64,64,32,8,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,16,1,1,2,0,0,1,1", - "v4:64,32,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:128,32,4,64,16,16,16,1,4,2,0,0,1,1", + "v4:128,128,8,128,16,16,8,1,4,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,16,32,32,32,8,1,3,2,0,0,1,1", + "v4:64,64,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,32,4,128,32,16,16,1,2,2,0,0,1,1", + "v4:32,32,16,32,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,32,32,32,32,1,1,4,2,0,0,1,1", + "v4:128,16,8,64,16,16,16,1,2,2,0,0,1,1", + "v4:64,32,4,64,32,16,8,1,2,2,0,0,1,1", + "v4:32,64,32,32,32,32,1,1,1,2,0,0,1,1", + "v4:128,32,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:32,64,32,32,32,32,1,1,2,2,0,0,1,1", + "v4:64,256,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:64,64,32,32,64,32,1,1,3,2,0,0,1,1", "v4:64,64,16,64,32,32,1,1,2,2,0,0,1,1", - "v4:16,32,32,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,16,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,16,16,16,16,8,1,2,2,0,0,1,1", - "v4:256,128,8,64,128,32,16,1,1,2,0,0,1,1", - "v4:64,256,4,64,64,32,8,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,16,1,1,2,0,0,1,1" + "v4:32,256,4,32,64,32,8,1,1,2,0,0,1,1", + "v4:128,128,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:256,64,4,64,64,16,16,1,2,2,0,0,1,1", + "v4:128,256,8,128,64,32,8,1,2,2,0,0,1,1", + "v4:64,64,8,64,64,32,8,1,2,2,0,0,1,1", + "v4:32,128,16,32,64,32,8,1,4,2,0,0,1,1", + "v4:32,64,8,32,64,32,16,1,1,2,0,0,1,1", + "v4:128,128,8,32,64,16,16,1,2,2,0,0,1,1", + "v4:256,32,8,64,32,32,16,1,2,2,0,0,1,1", + "v4:256,64,8,64,32,32,16,1,2,2,0,0,1,1", + "v4:128,128,16,64,16,16,16,1,1,2,0,0,1,1", + "v4:128,32,32,32,32,16,16,1,1,2,0,0,1,1", + "v4:256,128,8,64,64,32,16,1,1,2,0,0,1,1", + "v4:32,128,4,16,128,16,8,1,3,2,0,0,1,1", + "v4:64,64,16,32,64,32,16,1,1,2,0,0,1,1", + "v4:128,256,4,32,128,32,16,1,2,2,0,0,1,1", + "v4:256,32,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:256,64,4,64,32,32,16,1,2,2,0,0,1,1", + "v4:128,64,8,128,64,32,16,1,2,2,0,0,1,1", + "v4:256,16,4,16,16,16,16,1,2,0,1,4,1,1", + "v4:256,32,8,32,16,16,16,1,2,0,1,64,1,1", + "v4:256,80,8,16,80,16,8,1,2,0,4,32,1,1", + "v4:32,64,16,16,16,16,16,1,4,2,0,0,1,1", + "v4:32,64,16,16,32,16,16,1,4,2,0,0,1,1", + "v4:64,16,32,64,16,16,16,1,2,2,0,0,1,1" }; // END_CONV_XDL_i8_gfx950_DEFS @@ -901,7 +1048,7 @@ static const StringRef initParametersF32GemmGfx942[nInitParametersF32GemmGfx942] // END_GEMM_XDL_f32_gfx942_DECS // BEGIN_GEMM_XDL_f32_gfx950_DECS -static constexpr size_t nInitParametersF32GemmGfx950 = 6; +static constexpr size_t nInitParametersF32GemmGfx950 = 10; static const StringRef initParametersF32GemmGfx950[nInitParametersF32GemmGfx950]; // END_GEMM_XDL_f32_gfx950_DECS @@ -921,7 +1068,7 @@ static const StringRef initParametersF32ConvGfx942[nInitParametersF32ConvGfx942] // END_CONV_XDL_f32_gfx942_DECS // BEGIN_CONV_XDL_f32_gfx950_DECS -static constexpr size_t nInitParametersF32ConvGfx950 = 33; +static constexpr size_t nInitParametersF32ConvGfx950 = 60; static const StringRef initParametersF32ConvGfx950[nInitParametersF32ConvGfx950]; // END_CONV_XDL_f32_gfx950_DECS @@ -941,7 +1088,7 @@ static const StringRef initParametersF16GemmGfx942[nInitParametersF16GemmGfx942] // END_GEMM_XDL_f16_gfx942_DECS // BEGIN_GEMM_XDL_f16_gfx950_DECS -static constexpr size_t nInitParametersF16GemmGfx950 = 14; +static constexpr size_t nInitParametersF16GemmGfx950 = 28; static const StringRef initParametersF16GemmGfx950[nInitParametersF16GemmGfx950]; // END_GEMM_XDL_f16_gfx950_DECS @@ -961,7 +1108,7 @@ static const StringRef initParametersF16ConvGfx942[nInitParametersF16ConvGfx942] // END_CONV_XDL_f16_gfx942_DECS // BEGIN_CONV_XDL_f16_gfx950_DECS -static constexpr size_t nInitParametersF16ConvGfx950 = 26; +static constexpr size_t nInitParametersF16ConvGfx950 = 81; static const StringRef initParametersF16ConvGfx950[nInitParametersF16ConvGfx950]; // END_CONV_XDL_f16_gfx950_DECS @@ -991,7 +1138,7 @@ static const StringRef initParametersI8GemmGfx942[nInitParametersI8GemmGfx942]; // END_GEMM_XDL_i8_gfx942_DECS // BEGIN_GEMM_XDL_i8_gfx950_DECS -static constexpr size_t nInitParametersI8GemmGfx950 = 14; +static constexpr size_t nInitParametersI8GemmGfx950 = 39; static const StringRef initParametersI8GemmGfx950[nInitParametersI8GemmGfx950]; // END_GEMM_XDL_i8_gfx950_DECS @@ -1016,7 +1163,7 @@ static const StringRef initParametersI8ConvGfx942[nInitParametersI8ConvGfx942]; // END_CONV_XDL_i8_gfx942_DECS // BEGIN_CONV_XDL_i8_gfx950_DECS -static constexpr size_t nInitParametersI8ConvGfx950 = 21; +static constexpr size_t nInitParametersI8ConvGfx950 = 43; static const StringRef initParametersI8ConvGfx950[nInitParametersI8ConvGfx950]; // END_CONV_XDL_i8_gfx950_DECS From 6ae88d36681a045dcb75f243ed32b67dd848167f Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Fri, 23 Jan 2026 00:15:16 +0000 Subject: [PATCH 02/10] Update gfx942 quick-tune lists for gemm and conv. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 345 +++++++++++++----- 1 file changed, 247 insertions(+), 98 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 8a0f0f97eea0..db5791cb5acf 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -357,15 +357,23 @@ const StringRef PopulateParamsXDL::initParametersF32GemmGfx90a[] = { // BEGIN_GEMM_XDL_f32_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersF32GemmGfx942[] = { - "v4:32,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,4,1,2,2,0,0,1,1", - "v4:32,64,8,16,32,16,4,1,2,2,0,0,1,1", - "v4:32,64,8,32,16,16,4,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,64,16,16,1,1,2,2,0,0,1,1", - "v4:256,128,4,128,64,16,4,1,2,2,0,0,1,1" + "v4:64,64,8,16,64,16,4,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,4,1,2,2,0,0,1,1", + "v4:32,128,8,16,64,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,64,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,1,1,2,2,0,0,1,1", + "v4:128,64,4,128,16,16,8,1,1,2,0,0,1,1", + "v4:16,32,4,16,32,16,8,1,2,2,0,0,1,1", + "v4:256,256,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,64,64,16,1,1,2,2,0,0,1,1", + "v4:96,128,8,96,32,32,4,1,2,2,0,0,1,1", + "v4:256,256,8,32,256,32,1,1,4,2,0,0,1,1", + "v4:32,256,8,32,16,16,4,1,1,2,0,0,1,1", + "v4:128,192,4,32,96,16,4,1,1,0,2,0,1,1" }; // END_GEMM_XDL_f32_gfx942_DEFS @@ -432,29 +440,66 @@ const StringRef PopulateParamsXDL::initParametersF32ConvGfx90a[] = { // BEGIN_CONV_XDL_f32_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersF32ConvGfx942[] = { - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,64,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", "v4:32,32,8,16,16,16,4,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,32,8,16,32,16,4,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,2,2,0,0,1,1", - "v4:32,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,32,32,16,4,1,1,2,0,0,1,1", - "v4:64,16,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,32,4,32,16,16,4,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,1,1,1,2,0,0,1,1", - "v4:64,64,4,64,16,16,1,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,1,1,2,2,0,0,1,1", - "v4:32,32,4,32,32,32,1,1,1,2,0,0,1,1", - "v4:32,64,8,16,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,128,32,16,1,1,1,2,0,0,1,1" + "v4:64,32,8,32,16,16,4,1,1,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,4,1,4,2,0,0,1,1", + "v4:16,16,4,16,16,16,16,1,4,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,16,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,128,4,64,32,16,4,1,1,2,0,0,1,1", + "v4:32,64,8,16,32,16,4,1,1,2,0,0,1,1", + "v4:32,32,4,32,16,16,4,1,2,2,0,0,1,1", + "v4:16,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:32,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,16,4,1,2,2,0,0,1,1", + "v4:128,128,8,128,32,16,1,1,4,2,0,0,1,1", + "v4:128,128,4,128,16,16,4,1,1,2,0,0,1,1", + "v4:64,32,4,32,32,16,8,1,2,2,0,0,1,1", + "v4:128,64,4,64,32,16,4,1,4,2,0,0,1,1", + "v4:128,16,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,48,8,16,48,16,4,1,1,2,0,0,1,1", + "v4:256,64,4,128,16,16,4,1,2,2,0,0,1,1", + "v4:48,16,4,48,16,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,64,32,16,1,1,4,2,0,0,1,1", + "v4:64,64,4,64,64,16,4,1,1,2,0,0,1,1", + "v4:32,128,4,32,128,16,1,1,4,2,0,0,1,1", + "v4:128,64,8,64,16,16,4,1,2,2,0,0,1,1", + "v4:192,64,4,48,32,16,4,1,2,2,0,0,1,1", + "v4:64,256,8,64,32,16,1,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,32,64,16,1,1,4,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,3,2,0,0,1,1", + "v4:96,64,4,48,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,4,128,16,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,128,32,16,4,1,1,2,0,0,1,1", + "v4:256,128,4,64,64,16,4,1,2,2,0,0,1,1", + "v4:256,256,4,128,64,16,1,1,4,2,0,0,1,1", + "v4:32,256,4,32,256,16,1,1,4,2,0,0,1,1", + "v4:64,32,8,16,32,16,4,1,3,2,0,0,1,1", + "v4:128,96,4,32,48,16,4,1,1,2,0,0,1,1", + "v4:16,128,4,16,128,16,4,1,1,2,0,0,1,1", + "v4:256,128,8,64,32,16,1,1,2,2,0,0,1,1", + "v4:256,256,8,128,128,16,1,1,1,2,0,0,1,1", + "v4:64,256,4,64,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,2,32,64,32,16,1,2,2,0,0,1,1", + "v4:96,64,8,48,32,16,4,1,1,2,0,0,1,1", + "v4:48,32,4,48,16,16,16,1,3,2,0,0,1,1", + "v4:64,256,4,16,64,16,8,1,4,2,0,0,1,1", + "v4:96,192,8,48,192,16,1,1,4,2,0,0,1,1", + "v4:96,64,4,48,16,16,8,1,2,0,2,64,1,1", + "v4:160,64,8,80,32,16,4,1,1,2,0,0,1,1", + "v4:192,64,4,192,16,16,4,1,2,2,0,0,1,1", + "v4:96,96,8,48,48,16,1,1,4,1,0,0,1,1", + "v4:192,256,4,96,32,16,8,1,1,1,8,8,1,1", + "v4:64,192,8,32,48,16,1,1,4,0,8,0,1,1", + "v4:64,96,4,32,48,16,4,1,1,0,8,64,1,1", + "v4:96,16,4,96,16,16,8,1,1,2,0,0,1,1" }; // END_CONV_XDL_f32_gfx942_DEFS @@ -569,27 +614,33 @@ const StringRef PopulateParamsXDL::initParametersF16GemmGfx90a[] = { // BEGIN_GEMM_XDL_f16_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersF16GemmGfx942[] = { - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,16,32,16,8,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:32,32,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,128,4,32,64,32,8,1,1,2,0,0,1,1", - "v4:64,32,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:128,128,8,64,64,16,8,1,2,2,0,0,1,1", - "v4:256,128,4,128,64,16,8,1,1,2,0,0,1,1", - "v4:64,128,4,64,32,32,8,1,1,2,0,0,1,1", - "v4:128,128,2,64,64,32,8,1,1,2,0,0,1,1", - "v4:128,256,8,64,128,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,4,16,16,16,4,1,2,2,0,0,1,1", - "v4:256,32,2,128,32,32,4,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,32,4,64,16,16,8,1,2,2,0,0,1,1", - "v4:128,256,8,64,128,32,4,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,4,1,2,2,0,0,1,1" + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,32,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:32,64,8,16,32,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:16,64,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,32,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,32,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,4,2,0,0,1,1", + "v4:128,64,8,64,32,32,8,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,32,64,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:16,256,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:256,256,8,32,128,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,128,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,64,128,32,4,1,1,2,0,0,1,1", + "v4:256,256,4,64,128,32,8,1,2,2,0,0,1,1", + "v4:128,128,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,256,4,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,32,4,64,32,16,16,1,3,2,0,0,1,1", + "v4:96,128,8,96,32,32,4,1,2,2,0,0,1,1", + "v4:192,256,8,192,32,16,4,1,2,2,0,0,1,1", + "v4:256,256,8,32,128,32,8,1,1,1,2,32,1,1", + "v4:128,128,8,64,64,32,4,1,1,0,1,64,1,1", + "v4:192,192,4,96,192,16,8,1,1,1,1,16,1,1" }; // END_GEMM_XDL_f16_gfx942_DEFS @@ -667,26 +718,93 @@ const StringRef PopulateParamsXDL::initParametersF16ConvGfx90a[] = { // BEGIN_CONV_XDL_f16_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersF16ConvGfx942[] = { - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,16,8,32,16,16,8,1,2,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,64,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:16,64,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,128,4,32,64,32,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,4,1,1,2,0,0,1,1", - "v4:128,256,4,128,64,32,8,1,2,2,0,0,1,1", - "v4:64,64,8,16,64,16,4,1,1,2,0,0,1,1", - "v4:16,64,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,32,4,32,32,32,8,1,2,2,0,0,1,1", - "v4:32,128,8,32,32,32,1,1,2,2,0,0,1,1", - "v4:128,64,2,64,32,32,8,1,2,2,0,0,1,1", - "v4:64,32,4,16,32,16,4,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,16,8,32,16,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,16,4,1,2,2,0,0,1,1", "v4:32,256,2,32,64,32,4,1,2,2,0,0,1,1", - "v4:64,32,2,64,32,32,4,1,1,2,0,0,1,1" + "v4:16,128,4,16,32,16,8,1,2,2,0,0,1,1", + "v4:16,32,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,64,8,64,32,16,4,1,1,2,0,0,1,1", + "v4:16,16,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,2,128,64,32,4,1,2,2,0,0,1,1", + "v4:128,32,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,16,8,64,16,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,32,2,64,32,32,16,1,1,2,0,0,1,1", + "v4:128,32,2,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,32,4,128,16,16,4,1,2,2,0,0,1,1", + "v4:128,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:48,32,8,48,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,2,64,64,32,16,1,2,2,0,0,1,1", + "v4:64,128,4,64,128,16,4,1,1,2,0,0,1,1", + "v4:256,256,4,64,128,16,8,1,2,2,0,0,1,1", + "v4:64,256,4,64,64,32,4,1,1,2,0,0,1,1", + "v4:128,128,8,32,64,32,1,1,2,2,0,0,1,1", + "v4:32,128,4,32,128,16,4,1,1,2,0,0,1,1", + "v4:256,32,8,32,16,16,4,1,2,2,0,0,1,1", + "v4:32,256,4,32,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,128,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,256,4,128,64,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,64,64,32,8,1,2,2,0,0,1,1", + "v4:256,128,8,128,32,16,4,1,1,2,0,0,1,1", + "v4:256,64,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:32,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,2,128,128,32,8,1,2,2,0,0,1,1", + "v4:128,16,8,16,16,16,4,1,1,2,0,0,1,1", + "v4:128,256,8,128,128,32,1,1,2,2,0,0,1,1", + "v4:128,64,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,256,8,16,128,16,4,1,1,2,0,0,1,1", + "v4:32,128,8,32,32,32,4,1,3,2,0,0,1,1", + "v4:64,128,8,64,128,16,4,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,64,128,16,4,1,1,2,0,0,1,1", + "v4:192,32,4,48,32,16,16,1,1,2,0,0,1,1", + "v4:256,64,4,256,64,16,4,1,1,2,0,0,1,1", + "v4:96,16,8,48,16,16,16,1,2,2,0,0,1,1", + "v4:256,256,4,128,64,16,4,1,2,2,0,0,1,1", + "v4:256,256,4,64,64,32,4,1,1,2,0,0,1,1", + "v4:32,64,4,32,64,32,16,1,3,2,0,0,1,1", + "v4:128,64,4,128,16,16,16,1,1,2,0,0,1,1", + "v4:256,64,2,256,64,32,16,1,1,2,0,0,1,1", + "v4:128,256,2,128,64,32,8,1,4,2,0,0,1,1", + "v4:256,32,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,2,32,256,32,16,1,1,2,0,0,1,1", + "v4:256,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:16,16,4,16,16,16,8,1,2,0,0,4,1,1", + "v4:16,16,8,16,16,16,8,1,2,1,0,4,1,1", + "v4:160,64,4,160,32,32,8,1,2,2,0,0,1,1", + "v4:256,48,8,16,48,16,8,1,1,2,0,0,1,1", + "v4:160,64,8,80,32,16,8,1,1,0,0,4,1,1", + "v4:96,128,8,48,32,16,8,1,2,1,0,8,1,1", + "v4:96,64,4,48,16,16,16,1,2,0,8,4,1,1", + "v4:128,128,4,128,32,16,8,1,2,0,8,4,1,1", + "v4:128,256,2,32,256,32,4,1,1,0,0,64,1,1", + "v4:128,256,4,32,256,32,8,1,3,2,0,0,1,1", + "v4:192,32,8,48,32,16,8,1,1,0,8,16,1,1", + "v4:224,32,4,224,32,32,16,1,3,2,0,0,1,1", + "v4:256,256,2,32,128,32,16,1,2,2,0,0,1,1", + "v4:256,256,4,128,64,32,8,1,2,0,2,8,1,1", + "v4:32,64,8,32,32,16,16,1,3,2,0,0,1,1", + "v4:32,64,8,32,64,32,16,1,4,2,0,0,1,1", + "v4:64,192,4,16,192,16,8,1,2,2,0,0,1,1", + "v4:64,64,4,16,16,16,32,1,3,2,0,0,1,1" }; // END_CONV_XDL_f16_gfx942_DEFS @@ -858,19 +976,28 @@ const StringRef PopulateParamsXDL::initParametersI8GemmGfx90a[] = { // BEGIN_GEMM_XDL_i8_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersI8GemmGfx942[] = { - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,64,16,32,32,32,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,128,16,16,1,1,2,0,0,1,1", - "v4:128,16,4,32,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,16,32,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,64,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,128,16,16,32,16,16,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:256,128,8,64,128,16,8,1,1,2,0,0,1,1", - "v4:128,32,16,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,16,32,16,16,16,8,1,2,2,0,0,1,1" + "v4:32,32,32,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:32,64,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:32,128,8,32,32,16,16,1,2,2,0,0,1,1", + "v4:64,16,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,32,64,32,16,1,1,2,0,0,1,1", + "v4:64,64,32,16,32,16,16,1,1,2,0,0,1,1", + "v4:128,256,8,64,128,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,64,32,16,1,1,2,0,0,1,1", + "v4:128,128,16,128,16,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,64,16,32,32,16,16,1,1,2,0,0,1,1", + "v4:128,64,4,32,32,32,8,1,3,2,0,0,1,1", + "v4:128,256,8,16,256,16,8,1,2,2,0,0,1,1", + "v4:96,128,16,96,32,16,8,1,2,1,0,32,1,1", + "v4:256,256,16,128,32,32,8,1,2,2,0,0,1,1", + "v4:128,64,8,32,64,16,16,1,1,1,0,32,1,1", + "v4:192,128,8,96,64,16,16,1,1,0,1,8,1,1", + "v4:192,256,16,96,64,16,8,1,1,0,2,0,1,1", + "v4:192,64,8,96,32,16,16,1,1,0,2,0,1,1", + "v4:64,64,16,32,32,16,16,1,2,1,4,4,1,1" }; // END_GEMM_XDL_i8_gfx942_DEFS @@ -966,17 +1093,39 @@ const StringRef PopulateParamsXDL::initParametersI8ConvGfx90a[] = { // BEGIN_CONV_XDL_i8_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersI8ConvGfx942[] = { - "v4:16,16,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,32,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,32,32,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,32,16,16,16,1,1,2,2,0,0,1,1", - "v4:64,16,32,16,16,16,16,1,2,2,0,0,1,1", - "v4:16,32,32,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,64,16,64,32,32,1,1,2,2,0,0,1,1", - "v4:128,128,16,128,32,32,1,1,2,2,0,0,1,1", - "v4:32,64,32,32,16,16,1,1,2,2,0,0,1,1" + "v4:32,16,16,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,16,16,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,8,64,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,16,32,64,16,16,1,1,4,2,0,0,1,1", + "v4:128,64,8,32,64,32,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:32,64,16,32,32,32,1,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,32,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:256,32,4,128,32,32,16,1,2,2,0,0,1,1", + "v4:256,64,4,256,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,32,64,64,32,1,1,4,2,0,0,1,1", + "v4:128,64,8,32,64,32,8,1,2,2,0,0,1,1", + "v4:256,64,4,64,64,32,16,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,64,256,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,64,32,16,8,1,3,2,0,0,1,1", + "v4:128,128,8,64,32,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,128,32,32,16,1,1,2,0,0,1,1", + "v4:256,64,32,256,32,32,1,1,2,2,0,0,1,1", + "v4:128,256,4,128,128,16,8,1,1,2,0,0,1,1", + "v4:256,32,16,64,32,32,1,1,2,2,0,0,1,1", + "v4:128,64,4,32,32,16,16,1,1,2,0,0,1,1", + "v4:256,256,4,256,16,16,16,1,4,2,0,0,1,1", + "v4:256,64,32,64,32,32,1,1,2,2,0,0,1,1", + "v4:256,128,32,256,32,16,1,1,3,2,0,0,1,1", + "v4:64,192,4,32,96,16,8,1,2,2,0,0,1,1", + "v4:128,32,16,32,16,16,16,1,2,2,0,0,1,1", + "v4:256,16,8,16,16,16,8,1,2,0,0,64,1,1", + "v4:256,256,8,32,128,32,16,1,4,2,0,0,1,1", + "v4:256,80,8,16,80,16,8,1,2,0,2,64,1,1", + "v4:64,16,32,64,16,16,8,1,3,2,0,0,1,1" }; // END_CONV_XDL_i8_gfx942_DEFS @@ -1043,7 +1192,7 @@ static const StringRef initParametersF32GemmGfx90a[nInitParametersF32GemmGfx90a] // END_GEMM_XDL_f32_gfx90a_DECS // BEGIN_GEMM_XDL_f32_gfx942_DECS -static constexpr size_t nInitParametersF32GemmGfx942 = 9; +static constexpr size_t nInitParametersF32GemmGfx942 = 17; static const StringRef initParametersF32GemmGfx942[nInitParametersF32GemmGfx942]; // END_GEMM_XDL_f32_gfx942_DECS @@ -1063,7 +1212,7 @@ static const StringRef initParametersF32ConvGfx90a[nInitParametersF32ConvGfx90a] // END_CONV_XDL_f32_gfx90a_DECS // BEGIN_CONV_XDL_f32_gfx942_DECS -static constexpr size_t nInitParametersF32ConvGfx942 = 23; +static constexpr size_t nInitParametersF32ConvGfx942 = 60; static const StringRef initParametersF32ConvGfx942[nInitParametersF32ConvGfx942]; // END_CONV_XDL_f32_gfx942_DECS @@ -1083,7 +1232,7 @@ static const StringRef initParametersF16GemmGfx90a[nInitParametersF16GemmGfx90a] // END_GEMM_XDL_f16_gfx90a_DECS // BEGIN_GEMM_XDL_f16_gfx942_DECS -static constexpr size_t nInitParametersF16GemmGfx942 = 21; +static constexpr size_t nInitParametersF16GemmGfx942 = 27; static const StringRef initParametersF16GemmGfx942[nInitParametersF16GemmGfx942]; // END_GEMM_XDL_f16_gfx942_DECS @@ -1103,7 +1252,7 @@ static const StringRef initParametersF16ConvGfx90a[nInitParametersF16ConvGfx90a] // END_CONV_XDL_f16_gfx90a_DECS // BEGIN_CONV_XDL_f16_gfx942_DECS -static constexpr size_t nInitParametersF16ConvGfx942 = 20; +static constexpr size_t nInitParametersF16ConvGfx942 = 87; static const StringRef initParametersF16ConvGfx942[nInitParametersF16ConvGfx942]; // END_CONV_XDL_f16_gfx942_DECS @@ -1133,7 +1282,7 @@ static const StringRef initParametersI8GemmGfx90a[nInitParametersI8GemmGfx90a]; // END_GEMM_XDL_i8_gfx90a_DECS // BEGIN_GEMM_XDL_i8_gfx942_DECS -static constexpr size_t nInitParametersI8GemmGfx942 = 13; +static constexpr size_t nInitParametersI8GemmGfx942 = 22; static const StringRef initParametersI8GemmGfx942[nInitParametersI8GemmGfx942]; // END_GEMM_XDL_i8_gfx942_DECS @@ -1158,7 +1307,7 @@ static const StringRef initParametersI8ConvGfx90a[nInitParametersI8ConvGfx90a]; // END_CONV_XDL_i8_gfx90a_DECS // BEGIN_CONV_XDL_i8_gfx942_DECS -static constexpr size_t nInitParametersI8ConvGfx942 = 11; +static constexpr size_t nInitParametersI8ConvGfx942 = 33; static const StringRef initParametersI8ConvGfx942[nInitParametersI8ConvGfx942]; // END_CONV_XDL_i8_gfx942_DECS From f6a4b914466b802e4ffe8fc1c9f3fa6e69ab47bb Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Fri, 23 Jan 2026 00:22:29 +0000 Subject: [PATCH 03/10] Update gfx90a quick-tune lists for gemm and conv. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 280 +++++++++++++----- 1 file changed, 207 insertions(+), 73 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index db5791cb5acf..0fd93f3beec6 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -345,13 +345,19 @@ const StringRef PopulateParamsXDL::initParametersF32GemmGfx908[] = { // BEGIN_GEMM_XDL_f32_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersF32GemmGfx90a[] = { - "v4:32,64,8,16,32,16,4,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:32,64,8,32,16,16,4,1,1,2,0,0,1,1", + "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", "v4:64,64,4,32,32,32,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,4,1,2,2,0,0,1,1", - "v4:64,32,4,16,32,16,8,1,2,2,0,0,1,1", - "v4:128,64,4,128,16,16,4,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1" + "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,32,16,4,1,2,2,0,0,1,1", + "v4:64,128,2,32,32,32,8,1,1,2,0,0,1,1", + "v4:32,64,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,1,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,32,4,16,16,16,8,1,2,2,0,0,1,1" }; // END_GEMM_XDL_f32_gfx90a_DEFS @@ -417,24 +423,53 @@ const StringRef PopulateParamsXDL::initParametersF32ConvGfx908[] = { // BEGIN_CONV_XDL_f32_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersF32ConvGfx90a[] = { - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,4,1,2,2,0,0,1,1", "v4:64,64,4,32,32,32,4,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,128,2,64,32,32,4,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,128,4,64,32,16,4,1,1,2,0,0,1,1", + "v4:96,64,4,48,16,16,4,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,32,4,1,1,2,0,0,1,1", + "v4:32,64,8,32,16,16,4,1,1,2,0,0,1,1", + "v4:64,256,2,32,64,32,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,16,4,1,2,2,0,0,1,1", "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,64,16,16,4,1,1,2,0,0,1,1", - "v4:64,32,8,16,32,16,4,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,64,4,32,16,16,4,1,1,2,0,0,1,1", - "v4:64,64,2,64,32,32,1,1,2,2,0,0,1,1", - "v4:32,16,4,32,16,16,1,1,1,2,0,0,1,1", - "v4:128,256,8,128,64,32,1,1,1,2,0,0,1,1", - "v4:32,64,2,32,32,32,4,1,1,2,0,0,1,1", - "v4:32,32,4,32,16,16,1,1,2,2,0,0,1,1" + "v4:96,128,2,96,32,32,4,1,2,2,0,0,1,1", + "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:128,128,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,32,4,32,16,16,8,1,1,2,0,0,1,1", + "v4:32,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,256,2,128,64,32,4,1,1,2,0,0,1,1", + "v4:128,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,256,4,64,64,16,4,1,1,2,0,0,1,1", + "v4:16,64,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:16,32,4,16,32,16,4,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,4,1,1,2,0,0,1,1", + "v4:32,128,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:256,256,2,256,32,32,1,1,2,2,0,0,1,1", + "v4:128,128,4,32,32,16,4,1,2,2,0,0,1,1", + "v4:64,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,256,2,32,64,32,4,1,1,2,0,0,1,1", + "v4:32,64,2,32,64,32,4,1,1,2,0,0,1,1", + "v4:256,128,4,128,32,16,1,1,2,2,0,0,1,1", + "v4:32,256,4,32,128,16,1,1,1,2,0,0,1,1", + "v4:64,128,2,64,64,32,4,1,2,2,0,0,1,1", + "v4:256,256,8,256,64,16,1,1,1,2,0,0,1,1", + "v4:16,128,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,256,4,32,64,32,4,1,2,2,0,0,1,1", + "v4:192,32,4,48,16,16,8,1,2,2,0,0,1,1", + "v4:32,128,4,16,16,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,128,64,16,4,1,1,2,0,0,1,1", + "v4:32,64,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,8,1,2,0,8,32,1,1", + "v4:16,64,4,16,16,16,4,1,2,1,2,32,1,1", + "v4:96,128,4,96,32,16,4,1,1,0,1,64,1,1", + "v4:192,128,4,96,32,32,4,1,2,1,0,8,1,1" }; // END_CONV_XDL_f32_gfx90a_DEFS @@ -592,23 +627,30 @@ const StringRef PopulateParamsXDL::initParametersF16GemmGfx908[] = { // BEGIN_GEMM_XDL_f16_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersF16GemmGfx90a[] = { + "v4:16,64,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,64,32,32,4,1,1,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,64,64,32,8,1,1,2,0,0,1,1", - "v4:64,128,8,64,32,16,4,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,128,4,32,32,16,8,1,1,2,0,0,1,1", - "v4:16,64,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:128,128,8,64,64,32,8,1,2,2,0,0,1,1", - "v4:128,256,8,128,64,16,4,1,2,2,0,0,1,1", - "v4:64,256,4,64,64,16,4,1,1,2,0,0,1,1", - "v4:256,128,4,128,64,16,8,1,1,2,0,0,1,1" + "v4:128,128,8,128,32,16,4,1,1,2,0,0,1,1", + "v4:32,64,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:32,32,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,64,32,32,8,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,128,8,32,32,32,16,1,1,2,0,0,1,1", + "v4:128,32,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:256,256,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,32,4,1,1,1,8,16,1,1", + "v4:80,128,8,80,16,16,8,1,2,1,0,64,1,1", + "v4:256,256,4,256,64,16,4,1,1,0,0,0,1,1", + "v4:32,256,8,32,32,16,4,1,1,0,0,0,1,1", + "v4:64,256,8,64,32,32,4,1,1,0,2,16,1,1", + "v4:96,256,8,96,32,16,4,1,2,2,0,0,1,1" }; // END_GEMM_XDL_f16_gfx90a_DEFS @@ -698,21 +740,77 @@ const StringRef PopulateParamsXDL::initParametersF16ConvGfx908[] = { // BEGIN_CONV_XDL_f16_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersF16ConvGfx90a[] = { - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,8,1,1,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,32,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,128,32,32,8,1,2,2,0,0,1,1", - "v4:64,64,2,32,32,32,8,1,1,2,0,0,1,1", - "v4:64,32,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,128,8,32,32,32,4,1,1,2,0,0,1,1", - "v4:16,128,4,16,32,16,4,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,2,128,64,32,4,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,32,4,1,1,2,0,0,1,1", "v4:64,256,2,64,64,32,4,1,2,2,0,0,1,1", - "v4:128,64,2,128,32,32,4,1,1,2,0,0,1,1" + "v4:128,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:32,256,2,32,64,32,4,1,2,2,0,0,1,1", + "v4:64,256,8,32,64,32,4,1,1,2,0,0,1,1", + "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,2,64,128,32,4,1,2,2,0,0,1,1", + "v4:32,32,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:128,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,8,128,32,32,4,1,1,2,0,0,1,1", + "v4:16,32,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,8,1,1,2,0,0,1,1", + "v4:64,32,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,32,8,32,16,16,16,1,1,2,0,0,1,1", + "v4:32,64,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:256,128,8,128,32,16,4,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,32,8,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,32,8,1,2,2,0,0,1,1", + "v4:32,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,16,4,32,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,32,8,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,32,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:256,64,4,128,32,32,8,1,1,2,0,0,1,1", + "v4:128,256,4,128,64,32,4,1,1,2,0,0,1,1", + "v4:64,256,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:16,32,4,16,32,16,16,1,1,2,0,0,1,1", + "v4:32,32,8,16,16,16,4,1,1,2,0,0,1,1", + "v4:64,64,2,32,64,32,4,1,2,2,0,0,1,1", + "v4:64,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:32,64,4,16,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,256,4,64,128,32,8,1,1,2,0,0,1,1", + "v4:128,64,8,32,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,128,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,8,32,64,16,4,1,1,2,0,0,1,1", + "v4:128,64,4,64,64,32,8,1,2,2,0,0,1,1", + "v4:128,256,4,32,128,32,8,1,1,2,0,0,1,1", + "v4:128,256,4,64,128,16,4,1,1,2,0,0,1,1", + "v4:192,32,4,48,32,16,16,1,1,2,0,0,1,1", + "v4:256,128,4,32,64,32,16,1,1,2,0,0,1,1", + "v4:128,32,4,16,32,16,16,1,2,2,0,0,1,1", + "v4:128,32,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:256,32,4,16,32,16,8,1,2,2,0,0,1,1", + "v4:256,256,4,64,64,32,16,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,32,16,1,2,2,0,0,1,1", + "v4:256,128,8,64,128,32,4,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,4,1,1,2,0,0,1,1", + "v4:64,32,8,64,32,32,4,1,2,2,0,0,1,1", + "v4:128,128,4,32,128,32,16,1,1,2,0,0,1,1", + "v4:96,32,4,96,16,16,16,1,2,2,0,0,1,1", + "v4:64,256,4,64,16,16,16,1,1,2,0,0,1,1", + "v4:64,32,4,64,32,16,16,1,1,2,0,0,1,1", + "v4:128,256,4,32,256,16,4,1,1,2,0,0,1,1", + "v4:192,64,8,96,32,16,4,1,2,2,0,0,1,1", + "v4:256,48,8,16,48,16,8,1,1,1,1,4,1,1", + "v4:256,128,8,128,32,32,4,1,1,1,8,4,1,1", + "v4:256,192,8,64,96,32,8,1,1,0,1,64,1,1", + "v4:96,64,8,96,32,32,4,1,1,1,0,64,1,1" }; // END_CONV_XDL_f16_gfx90a_DEFS @@ -963,14 +1061,31 @@ const StringRef PopulateParamsXDL::initParametersI8GemmGfx908[] = { // BEGIN_GEMM_XDL_i8_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersI8GemmGfx90a[] = { - "v4:64,64,8,32,32,16,16,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,16,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:64,256,8,64,64,16,4,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,128,4,32,32,16,16,1,2,2,0,0,1,1", - "v4:128,64,8,32,64,32,8,1,1,2,0,0,1,1" + "v4:32,32,16,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,16,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:32,64,32,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,16,64,16,16,1,1,2,0,0,1,1", + "v4:64,128,16,64,32,16,4,1,1,2,0,0,1,1", + "v4:128,128,16,128,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,16,64,64,16,4,1,1,2,0,0,1,1", + "v4:16,128,16,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,64,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:96,128,16,96,32,32,4,1,2,2,0,0,1,1", + "v4:256,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,64,32,16,32,16,1,1,2,2,0,0,1,1", + "v4:32,128,32,32,32,32,1,1,2,2,0,0,1,1", + "v4:128,128,4,32,32,16,4,1,2,2,0,0,1,1", + "v4:128,32,16,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,256,16,32,32,32,4,1,1,2,0,0,1,1", + "v4:128,128,16,32,64,16,8,1,2,2,0,0,1,1", + "v4:16,16,32,16,16,16,16,1,1,0,4,0,1,1", + "v4:64,64,32,32,64,32,1,1,1,2,0,0,1,1", + "v4:128,128,32,32,32,16,8,1,2,0,0,4,1,1", + "v4:128,16,32,32,16,16,8,1,1,2,0,0,1,1", + "v4:128,32,16,128,32,32,16,1,1,2,0,0,1,1", + "v4:64,128,32,16,32,16,8,1,1,0,2,32,1,1", + "v4:80,128,8,80,16,16,16,1,2,0,4,8,1,1" }; // END_GEMM_XDL_i8_gfx90a_DEFS @@ -1079,15 +1194,34 @@ const StringRef PopulateParamsXDL::initParametersI8ConvGfx908[] = { // BEGIN_CONV_XDL_i8_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersI8ConvGfx90a[] = { - "v4:32,32,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,16,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,32,32,32,1,1,1,2,0,0,1,1", - "v4:64,32,32,32,16,16,8,1,2,2,0,0,1,1", - "v4:16,256,4,16,64,16,4,1,1,2,0,0,1,1", - "v4:32,64,16,32,16,16,1,1,1,2,0,0,1,1" + "v4:32,32,32,16,16,16,8,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:32,64,32,16,16,16,8,1,2,2,0,0,1,1", + "v4:128,16,16,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,32,32,32,16,1,1,2,0,0,1,1", + "v4:64,256,4,64,64,32,4,1,1,2,0,0,1,1", + "v4:64,16,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,128,32,64,32,16,4,1,1,2,0,0,1,1", + "v4:128,128,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:256,64,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:32,128,32,16,32,16,4,1,1,2,0,0,1,1", + "v4:128,32,16,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,64,4,32,32,16,4,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,32,16,1,1,2,0,0,1,1", + "v4:128,128,4,64,16,16,8,1,2,2,0,0,1,1", + "v4:256,128,8,64,32,32,8,1,2,2,0,0,1,1", + "v4:128,128,8,64,16,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,64,64,32,16,1,1,2,0,0,1,1", + "v4:128,64,4,64,64,32,16,1,1,0,2,4,1,1", + "v4:256,128,4,256,32,32,16,1,1,2,0,0,1,1", + "v4:256,128,8,64,32,32,16,1,1,1,8,4,1,1", + "v4:32,128,8,32,128,32,4,1,2,2,0,0,1,1", + "v4:64,128,4,64,128,32,4,1,1,0,0,8,1,1", + "v4:64,16,16,16,16,16,4,1,2,0,1,8,1,1", + "v4:64,64,32,32,64,16,4,1,2,2,0,0,1,1" }; // END_CONV_XDL_i8_gfx90a_DEFS @@ -1187,7 +1321,7 @@ static const StringRef initParametersF32GemmGfx908[nInitParametersF32GemmGfx908] // END_GEMM_XDL_f32_gfx908_DECS // BEGIN_GEMM_XDL_f32_gfx90a_DECS -static constexpr size_t nInitParametersF32GemmGfx90a = 7; +static constexpr size_t nInitParametersF32GemmGfx90a = 13; static const StringRef initParametersF32GemmGfx90a[nInitParametersF32GemmGfx90a]; // END_GEMM_XDL_f32_gfx90a_DECS @@ -1207,7 +1341,7 @@ static const StringRef initParametersF32ConvGfx908[nInitParametersF32ConvGfx908] // END_CONV_XDL_f32_gfx908_DECS // BEGIN_CONV_XDL_f32_gfx90a_DECS -static constexpr size_t nInitParametersF32ConvGfx90a = 18; +static constexpr size_t nInitParametersF32ConvGfx90a = 47; static const StringRef initParametersF32ConvGfx90a[nInitParametersF32ConvGfx90a]; // END_CONV_XDL_f32_gfx90a_DECS @@ -1227,7 +1361,7 @@ static const StringRef initParametersF16GemmGfx908[nInitParametersF16GemmGfx908] // END_GEMM_XDL_f16_gfx908_DECS // BEGIN_GEMM_XDL_f16_gfx90a_DECS -static constexpr size_t nInitParametersF16GemmGfx90a = 17; +static constexpr size_t nInitParametersF16GemmGfx90a = 24; static const StringRef initParametersF16GemmGfx90a[nInitParametersF16GemmGfx90a]; // END_GEMM_XDL_f16_gfx90a_DECS @@ -1247,7 +1381,7 @@ static const StringRef initParametersF16ConvGfx908[nInitParametersF16ConvGfx908] // END_CONV_XDL_f16_gfx908_DECS // BEGIN_CONV_XDL_f16_gfx90a_DECS -static constexpr size_t nInitParametersF16ConvGfx90a = 16; +static constexpr size_t nInitParametersF16ConvGfx90a = 71; static const StringRef initParametersF16ConvGfx90a[nInitParametersF16ConvGfx90a]; // END_CONV_XDL_f16_gfx90a_DECS @@ -1277,7 +1411,7 @@ static const StringRef initParametersI8GemmGfx908[nInitParametersI8GemmGfx908]; // END_GEMM_XDL_i8_gfx908_DECS // BEGIN_GEMM_XDL_i8_gfx90a_DECS -static constexpr size_t nInitParametersI8GemmGfx90a = 8; +static constexpr size_t nInitParametersI8GemmGfx90a = 25; static const StringRef initParametersI8GemmGfx90a[nInitParametersI8GemmGfx90a]; // END_GEMM_XDL_i8_gfx90a_DECS @@ -1302,7 +1436,7 @@ static const StringRef initParametersI8ConvGfx908[nInitParametersI8ConvGfx908]; // END_CONV_XDL_i8_gfx908_DECS // BEGIN_CONV_XDL_i8_gfx90a_DECS -static constexpr size_t nInitParametersI8ConvGfx90a = 9; +static constexpr size_t nInitParametersI8ConvGfx90a = 28; static const StringRef initParametersI8ConvGfx90a[nInitParametersI8ConvGfx90a]; // END_CONV_XDL_i8_gfx90a_DECS From 67bb9f011fc06bc828c788b73858d346d486fac2 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Fri, 23 Jan 2026 00:29:36 +0000 Subject: [PATCH 04/10] Update gfx908 quick-tune lists for gemm and conv. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 244 ++++++++++++------ 1 file changed, 171 insertions(+), 73 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 0fd93f3beec6..0a64b90be349 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -332,14 +332,21 @@ static const StringRef initParametersF32ConvGfx1150[nInitParametersF32ConvGfx115 // BEGIN_GEMM_XDL_f32_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersF32GemmGfx908[] = { - "v4:64,64,8,32,32,32,4,1,2,2,0,0,1,1", - "v4:64,64,4,32,32,32,4,1,2,2,0,0,1,1", - "v4:128,64,4,128,16,16,4,1,1,2,0,0,1,1", - "v4:32,32,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,32,8,32,16,16,4,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1" + "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,4,1,1,2,0,0,1,1", + "v4:64,128,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,64,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,64,32,32,4,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,1,1,2,2,0,0,1,1", + "v4:32,64,8,16,64,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,16,16,8,1,2,2,0,0,1,1", + "v4:16,96,8,16,48,16,8,1,2,2,0,0,1,1", + "v4:128,96,8,16,48,16,4,1,2,2,0,0,1,1", + "v4:96,128,4,96,32,32,4,1,2,1,1,64,1,1", + "v4:96,64,8,96,16,16,4,1,1,0,1,32,1,1" }; // END_GEMM_XDL_f32_gfx908_DEFS @@ -400,24 +407,45 @@ const StringRef PopulateParamsXDL::initParametersF32GemmGfx950[] = { // BEGIN_CONV_XDL_f32_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersF32ConvGfx908[] = { - "v4:32,32,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,4,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,2,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,64,8,32,32,32,4,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,32,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,128,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:32,32,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,64,2,32,32,32,8,1,1,2,0,0,1,1", + "v4:64,128,4,64,32,32,4,1,1,2,0,0,1,1", + "v4:64,64,4,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,32,4,1,2,2,0,0,1,1", "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,4,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:32,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:16,64,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,256,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:16,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:16,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:64,32,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:32,128,2,32,32,32,8,1,2,2,0,0,1,1", "v4:64,128,4,64,32,16,4,1,1,2,0,0,1,1", - "v4:128,64,4,64,32,32,4,1,2,2,0,0,1,1", - "v4:128,128,2,128,32,32,1,1,2,2,0,0,1,1", - "v4:64,32,2,64,32,32,1,1,1,2,0,0,1,1", - "v4:32,128,4,16,64,16,4,1,2,2,0,0,1,1", - "v4:64,64,2,64,32,32,1,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,4,1,2,2,0,0,1,1" + "v4:16,128,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:16,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:64,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,64,4,16,64,16,4,1,1,2,0,0,1,1", + "v4:64,32,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:16,32,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:128,128,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:32,128,8,32,32,16,4,1,2,2,0,0,1,1", + "v4:32,32,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,128,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:64,64,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:32,256,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,256,8,64,64,16,1,1,2,2,0,0,1,1", + "v4:128,256,2,64,32,32,4,1,1,2,0,0,1,1", + "v4:16,256,4,16,64,16,4,1,1,2,0,0,1,1", + "v4:256,128,2,256,32,32,1,1,2,2,0,0,1,1", + "v4:32,128,4,32,64,32,4,1,1,2,0,0,1,1", + "v4:32,256,4,32,64,32,4,1,1,2,0,0,1,1", + "v4:256,128,2,128,64,32,4,1,2,2,0,0,1,1", + "v4:32,128,4,32,16,16,4,1,2,1,4,0,1,1", + "v4:128,96,2,32,96,32,4,1,2,0,2,16,1,1" }; // END_CONV_XDL_f32_gfx908_DEFS @@ -605,23 +633,27 @@ const StringRef PopulateParamsXDL::initParametersF32ConvGfx950[] = { // BEGIN_GEMM_XDL_f16_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersF16GemmGfx908[] = { - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,16,16,16,16,1,2,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,128,16,4,1,2,2,0,0,1,1", - "v4:64,128,4,32,64,32,8,1,2,2,0,0,1,1", - "v4:16,64,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,128,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:128,64,4,64,32,32,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,128,32,8,1,2,2,0,0,1,1", - "v4:128,128,8,32,128,32,4,1,1,2,0,0,1,1", - "v4:128,256,8,128,64,16,4,1,2,2,0,0,1,1", - "v4:128,256,8,64,128,32,4,1,1,2,0,0,1,1", - "v4:128,64,8,32,64,16,8,1,2,2,0,0,1,1", - "v4:64,128,4,64,32,32,8,1,1,2,0,0,1,1", - "v4:256,128,4,128,64,16,8,1,1,2,0,0,1,1" + "v4:64,128,8,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,32,32,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,128,8,64,64,32,4,1,1,2,0,0,1,1", + "v4:128,256,8,32,64,32,8,1,1,2,0,0,1,1", + "v4:32,128,8,32,16,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,128,32,32,8,1,1,2,0,0,1,1", + "v4:256,256,8,256,16,16,8,1,1,2,0,0,1,1", + "v4:256,256,4,32,128,32,16,1,1,2,0,0,1,1", + "v4:64,32,8,32,32,16,16,1,2,2,0,0,1,1", + "v4:256,192,8,32,96,32,8,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,32,8,1,1,0,1,64,1,1", + "v4:128,64,8,32,64,32,8,1,2,0,8,16,1,1", + "v4:160,32,4,80,16,16,16,1,1,1,1,0,1,1", + "v4:256,256,4,128,32,32,8,1,1,0,2,0,1,1", + "v4:32,16,4,16,16,16,16,1,2,0,0,16,1,1", + "v4:80,128,8,80,16,16,8,1,2,1,1,32,1,1" }; // END_GEMM_XDL_f16_gfx908_DEFS @@ -721,20 +753,64 @@ const StringRef PopulateParamsXDL::initParametersF16GemmGfx950[] = { // BEGIN_CONV_XDL_f16_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersF16ConvGfx908[] = { - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,32,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,32,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,8,128,32,32,4,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,32,16,1,2,2,0,0,1,1", + "v4:128,256,2,128,64,32,4,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,32,4,1,1,2,0,0,1,1", + "v4:16,32,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,16,16,16,16,1,2,2,0,0,1,1", "v4:64,64,4,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,128,8,64,32,32,8,1,2,2,0,0,1,1", - "v4:64,128,8,32,64,32,4,1,1,2,0,0,1,1", - "v4:32,128,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,64,64,32,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,4,16,16,16,8,1,1,2,0,0,1,1", + "v4:16,32,4,16,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,32,32,8,1,1,2,0,0,1,1", + "v4:32,128,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,32,4,32,16,16,8,1,2,2,0,0,1,1", "v4:32,256,2,32,64,32,4,1,2,2,0,0,1,1", - "v4:128,256,2,128,64,32,4,1,1,2,0,0,1,1", - "v4:64,32,2,64,32,32,4,1,1,2,0,0,1,1" + "v4:128,128,2,64,32,32,8,1,2,2,0,0,1,1", + "v4:32,32,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:128,128,2,64,64,32,16,1,1,2,0,0,1,1", + "v4:32,64,4,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,64,4,32,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,8,32,64,32,8,1,1,2,0,0,1,1", + "v4:128,32,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,64,8,64,32,32,4,1,2,2,0,0,1,1", + "v4:64,64,4,64,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,4,64,32,32,4,1,1,2,0,0,1,1", + "v4:128,32,4,32,32,32,16,1,1,2,0,0,1,1", + "v4:16,64,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,2,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,256,4,64,32,32,16,1,1,2,0,0,1,1", + "v4:128,48,8,16,48,16,4,1,2,2,0,0,1,1", + "v4:256,64,4,128,32,32,8,1,1,2,0,0,1,1", + "v4:64,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:256,256,2,64,64,32,16,1,2,2,0,0,1,1", + "v4:128,256,8,128,64,32,4,1,1,2,0,0,1,1", + "v4:64,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:256,64,4,64,32,32,8,1,2,2,0,0,1,1", + "v4:64,256,2,64,32,32,16,1,2,2,0,0,1,1", + "v4:128,64,2,128,32,32,16,1,1,2,0,0,1,1", + "v4:128,32,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:256,64,8,32,64,32,8,1,1,2,0,0,1,1", + "v4:256,32,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:192,64,4,96,32,16,8,1,2,2,0,0,1,1", + "v4:64,32,4,64,32,32,16,1,1,2,0,0,1,1", + "v4:128,128,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:256,256,8,64,64,32,8,1,1,2,0,0,1,1", + "v4:256,128,8,32,64,32,8,1,1,2,0,0,1,1", + "v4:192,256,2,96,32,32,16,1,2,2,0,0,1,1", + "v4:160,64,4,160,32,32,8,1,2,1,8,16,1,1", + "v4:96,128,4,48,32,16,16,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,8,1,2,1,1,4,1,1", + "v4:128,256,4,64,256,32,8,1,1,0,0,0,1,1", + "v4:192,64,8,96,16,16,8,1,1,1,8,32,1,1", + "v4:256,192,4,32,96,32,4,1,2,0,4,0,1,1", + "v4:32,16,4,16,16,16,8,1,2,1,1,8,1,1" }; // END_CONV_XDL_f16_gfx908_DEFS @@ -1048,14 +1124,25 @@ const StringRef PopulateParamsXDL::initParametersFp8ConvGfx900[] = { // BEGIN_GEMM_XDL_i8_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersI8GemmGfx908[] = { - "v4:64,32,4,16,32,16,16,1,2,2,0,0,1,1", - "v4:64,64,4,32,32,32,16,1,2,2,0,0,1,1", - "v4:128,64,16,128,16,16,4,1,1,2,0,0,1,1", - "v4:128,128,8,128,32,32,8,1,2,2,0,0,1,1", "v4:32,32,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,64,32,32,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,16,16,16,16,16,1,1,2,0,0,1,1" + "v4:32,64,16,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,32,16,1,1,2,0,0,1,1", + "v4:32,32,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,16,128,32,32,4,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,16,16,32,16,16,1,2,2,0,0,1,1", + "v4:32,128,32,16,16,16,8,1,1,2,0,0,1,1", + "v4:32,256,32,32,32,32,1,1,2,2,0,0,1,1", + "v4:256,64,16,128,32,32,1,1,2,2,0,0,1,1", + "v4:48,64,8,48,16,16,16,1,1,2,0,0,1,1", + "v4:64,128,16,16,128,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,64,64,32,16,1,2,1,0,0,1,1", + "v4:128,32,4,32,32,32,16,1,2,1,0,0,1,1", + "v4:48,256,16,48,16,16,8,1,1,0,0,4,1,1", + "v4:64,128,32,64,32,32,1,1,1,0,4,0,1,1", + "v4:80,128,16,80,16,16,8,1,2,1,8,8,1,1", + "v4:96,128,16,48,16,16,16,1,1,1,2,16,1,1" }; // END_GEMM_XDL_i8_gfx908_DEFS @@ -1181,14 +1268,25 @@ const StringRef PopulateParamsXDL::initParametersFp4GemmGfx950[] = { // BEGIN_CONV_XDL_i8_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersI8ConvGfx908[] = { - "v4:32,32,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,32,32,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:64,32,16,16,32,16,4,1,1,2,0,0,1,1", - "v4:64,16,16,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,64,4,16,64,16,4,1,1,2,0,0,1,1", - "v4:32,256,4,32,64,16,4,1,2,2,0,0,1,1", - "v4:128,32,8,128,32,32,1,1,1,2,0,0,1,1" + "v4:32,32,32,16,16,16,8,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,16,16,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,128,16,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,32,8,32,32,32,16,1,1,2,0,0,1,1", + "v4:64,256,4,64,64,32,4,1,1,2,0,0,1,1", + "v4:128,64,8,64,32,32,8,1,1,2,0,0,1,1", + "v4:64,64,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,128,32,32,8,1,1,2,0,0,1,1", + "v4:128,64,16,64,32,32,4,1,1,2,0,0,1,1", + "v4:128,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,256,32,32,4,1,1,2,0,0,1,1", + "v4:64,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:256,128,8,64,32,32,16,1,1,2,0,0,1,1", + "v4:192,16,16,48,16,16,8,1,2,2,0,0,1,1", + "v4:256,128,16,64,32,32,4,1,1,2,0,0,1,1", + "v4:256,64,8,32,32,32,16,1,1,2,0,0,1,1", + "v4:128,256,32,64,64,16,1,1,2,2,0,0,1,1" }; // END_CONV_XDL_i8_gfx908_DEFS @@ -1316,7 +1414,7 @@ const StringRef PopulateParamsXDL::initParametersI8ConvGfx950[] = { #ifdef XDL_DECLARATIONS_GEN // BEGIN_GEMM_XDL_f32_gfx908_DECS -static constexpr size_t nInitParametersF32GemmGfx908 = 8; +static constexpr size_t nInitParametersF32GemmGfx908 = 15; static const StringRef initParametersF32GemmGfx908[nInitParametersF32GemmGfx908]; // END_GEMM_XDL_f32_gfx908_DECS @@ -1336,7 +1434,7 @@ static const StringRef initParametersF32GemmGfx950[nInitParametersF32GemmGfx950] // END_GEMM_XDL_f32_gfx950_DECS // BEGIN_CONV_XDL_f32_gfx908_DECS -static constexpr size_t nInitParametersF32ConvGfx908 = 18; +static constexpr size_t nInitParametersF32ConvGfx908 = 39; static const StringRef initParametersF32ConvGfx908[nInitParametersF32ConvGfx908]; // END_CONV_XDL_f32_gfx908_DECS @@ -1356,7 +1454,7 @@ static const StringRef initParametersF32ConvGfx950[nInitParametersF32ConvGfx950] // END_CONV_XDL_f32_gfx950_DECS // BEGIN_GEMM_XDL_f16_gfx908_DECS -static constexpr size_t nInitParametersF16GemmGfx908 = 17; +static constexpr size_t nInitParametersF16GemmGfx908 = 21; static const StringRef initParametersF16GemmGfx908[nInitParametersF16GemmGfx908]; // END_GEMM_XDL_f16_gfx908_DECS @@ -1376,7 +1474,7 @@ static const StringRef initParametersF16GemmGfx950[nInitParametersF16GemmGfx950] // END_GEMM_XDL_f16_gfx950_DECS // BEGIN_CONV_XDL_f16_gfx908_DECS -static constexpr size_t nInitParametersF16ConvGfx908 = 14; +static constexpr size_t nInitParametersF16ConvGfx908 = 58; static const StringRef initParametersF16ConvGfx908[nInitParametersF16ConvGfx908]; // END_CONV_XDL_f16_gfx908_DECS @@ -1406,7 +1504,7 @@ static const StringRef initParametersFp8ConvGfx900[nInitParametersFp8ConvGfx900] // END_CONV_XDL_fp8_gfx900_DECS // BEGIN_GEMM_XDL_i8_gfx908_DECS -static constexpr size_t nInitParametersI8GemmGfx908 = 8; +static constexpr size_t nInitParametersI8GemmGfx908 = 19; static const StringRef initParametersI8GemmGfx908[nInitParametersI8GemmGfx908]; // END_GEMM_XDL_i8_gfx908_DECS @@ -1431,7 +1529,7 @@ static const StringRef initParametersFp4GemmGfx950[nInitParametersFp4GemmGfx950] // END_GEMM_XDL_fp4_gfx950_DECS // BEGIN_CONV_XDL_i8_gfx908_DECS -static constexpr size_t nInitParametersI8ConvGfx908 = 8; +static constexpr size_t nInitParametersI8ConvGfx908 = 19; static const StringRef initParametersI8ConvGfx908[nInitParametersI8ConvGfx908]; // END_CONV_XDL_i8_gfx908_DECS From 06cbfe80336c0b2397f1ab4bad6695808c4cb1d1 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Fri, 23 Jan 2026 00:32:23 +0000 Subject: [PATCH 05/10] Update gfx1201 quick-tune lists for gemm and conv. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 459 ++++++++++++++---- 1 file changed, 355 insertions(+), 104 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 0a64b90be349..626f0514ffc9 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -102,50 +102,49 @@ const StringRef PopulateParams::initParametersF32ConvGfx1100[] = { // BEGIN_GEMM_NonAccel_f32_gfx1201_DEFS const StringRef PopulateParams::initParametersF32GemmGfx1201[] = { - "v3:128,128,128,16,2,2,1,1,2", + "v3:64,64,128,4,2,4,1,1,2", + "v3:128,128,128,8,2,4,1,1,2", "v3:64,128,64,16,2,2,1,1,2", - "v3:64,64,32,16,2,2,1,1,2", - "v3:128,128,128,4,4,2,1,1,2", - "v3:64,64,64,16,4,4,1,1,2", - "v3:64,64,128,16,4,4,1,1,2", - "v3:64,64,64,8,2,4,1,1,2", - "v3:128,128,64,8,2,4,1,1,2", "v3:128,32,32,16,2,2,1,1,2", - "v3:64,32,32,8,2,2,1,1,2" + "v3:64,64,128,16,2,2,1,1,2", + "v3:64,128,64,16,4,4,1,1,2", + "v3:64,128,64,4,2,4,1,1,2", + "v3:64,32,64,8,2,4,1,1,2", + "v3:64,32,128,8,2,2,1,1,2", + "v3:64,64,64,8,2,4,1,1,2", + "v3:128,64,64,8,2,2,1,1,2", + "v3:256,64,64,8,4,2,1,1,2" }; // END_GEMM_NonAccel_f32_gfx1201_DEFS // BEGIN_CONV_NonAccel_f32_gfx1201_DEFS const StringRef PopulateParams::initParametersF32ConvGfx1201[] = { - "v3:64,64,128,4,2,4,1,1,2", - "v3:64,64,128,8,2,4,1,1,2", + "v3:64,64,128,4,2,2,1,1,2", + "v3:128,64,64,8,2,2,1,1,2", + "v3:64,64,128,8,2,2,1,1,2", + "v3:64,64,64,16,2,2,1,1,2", + "v3:64,64,64,16,2,4,1,1,2", "v3:64,64,64,4,4,4,1,1,2", - "v3:64,128,64,16,2,2,1,1,2", - "v3:64,64,128,8,4,4,1,1,2", + "v3:64,32,128,8,2,2,1,1,2", "v3:64,64,128,16,2,2,1,1,2", - "v3:64,64,64,4,2,2,1,1,2", - "v3:64,64,128,16,2,4,1,1,2", - "v3:64,64,64,16,2,2,1,1,2", - "v3:64,32,128,4,2,2,1,1,2", - "v3:128,128,128,16,2,2,1,1,2", - "v3:128,128,128,16,4,2,1,1,2", - "v3:128,128,128,8,2,2,1,1,2", - "v3:64,64,32,8,2,2,1,1,2", - "v3:64,64,32,16,2,4,1,1,2", - "v3:128,128,64,8,4,4,1,1,2", - "v3:64,32,64,8,2,4,1,1,2", - "v3:64,64,64,16,4,4,1,1,2", - "v3:128,64,64,8,2,4,1,1,2", + "v3:64,32,32,8,2,2,1,1,2", + "v3:64,32,64,8,4,4,1,1,2", + "v3:128,128,128,16,2,4,1,1,2", "v3:128,128,128,4,2,2,1,1,2", - "v3:128,128,32,16,2,4,1,1,2", + "v3:64,64,128,16,2,4,1,1,2", "v3:128,32,32,16,2,4,1,1,2", - "v3:64,128,64,8,4,2,1,1,2", - "v3:64,32,32,8,2,2,1,1,2", + "v3:64,32,64,4,4,2,1,1,2", "v3:256,128,128,8,4,2,1,1,2", - "v3:128,64,32,16,2,2,1,1,2", - "v3:128,128,64,16,2,4,1,1,2", - "v3:256,128,128,16,4,4,1,1,2", - "v3:256,32,32,16,2,2,1,1,2" + "v3:128,64,128,8,4,4,1,1,2", + "v3:64,32,128,4,4,2,1,1,2", + "v3:128,128,128,8,2,4,1,1,2", + "v3:64,128,64,16,4,4,1,1,2", + "v3:64,32,32,16,2,2,1,1,2", + "v3:128,128,64,8,2,4,1,1,2", + "v3:64,64,32,8,2,2,1,1,2", + "v3:256,128,128,16,2,4,1,1,2", + "v3:128,128,64,8,4,4,1,1,2", + "v3:128,128,32,16,2,2,1,1,2" }; // END_CONV_NonAccel_f32_gfx1201_DEFS @@ -297,12 +296,12 @@ static const StringRef initParametersF32ConvGfx1100[nInitParametersF32ConvGfx110 // END_CONV_NonAccel_f32_gfx1100_DECS // BEGIN_GEMM_NonAccel_f32_gfx1201_DECS -static constexpr size_t nInitParametersF32GemmGfx1201 = 10; +static constexpr size_t nInitParametersF32GemmGfx1201 = 12; static const StringRef initParametersF32GemmGfx1201[nInitParametersF32GemmGfx1201]; // END_GEMM_NonAccel_f32_gfx1201_DECS // BEGIN_CONV_NonAccel_f32_gfx1201_DECS -static constexpr size_t nInitParametersF32ConvGfx1201 = 29; +static constexpr size_t nInitParametersF32ConvGfx1201 = 26; static const StringRef initParametersF32ConvGfx1201[nInitParametersF32ConvGfx1201]; // END_CONV_NonAccel_f32_gfx1201_DECS @@ -1782,96 +1781,348 @@ const StringRef PopulateParamsWmma::initParametersI8ConvGfx1100[] = { // BEGIN_GEMM_Wmma_f16_gfx1201_DEFS const StringRef PopulateParamsWmma::initParametersF16GemmGfx1201[] = { - "v4:128,64,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,128,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,16,32,16,8,1,2,2,0,0,1,1", - "v4:64,128,2,32,64,16,8,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,256,4,64,64,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:32,128,4,32,32,16,8,1,2,2,0,0,1,1", - "v4:256,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,32,128,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,32,128,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,32,128,16,8,1,2,2,0,0,1,1", + "v4:64,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:16,32,8,16,32,16,16,1,2,2,0,0,1,1", "v4:256,128,4,128,32,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,64,32,16,8,1,1,2,0,0,1,1" + "v4:64,64,8,32,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,4,1,1,2,0,0,1,1", + "v4:16,32,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:16,128,8,16,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,2,2,0,0,1,1", + "v4:96,32,8,48,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,1,0,8,0,1,1", + "v4:80,64,8,80,16,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,64,16,16,8,1,1,2,0,0,1,1", + "v4:16,64,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,16,8,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,64,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,16,1,1,2,0,0,1,1", + "v4:256,256,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:128,128,2,128,32,16,16,1,2,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,16,1,2,2,0,0,1,1", + "v4:64,32,2,64,32,16,16,1,2,2,0,0,1,1", + "v4:64,64,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:96,192,4,96,48,16,4,1,2,2,0,0,1,1", + "v4:128,128,2,64,128,16,8,1,1,2,0,0,1,1", + "v4:128,16,4,128,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:16,64,8,16,64,16,4,1,2,2,0,0,1,1", + "v4:192,64,4,48,32,16,8,1,2,2,0,0,1,1", + "v4:256,128,4,128,64,16,8,1,2,2,0,0,1,1", + "v4:256,256,4,256,32,16,4,1,2,2,0,0,1,1", + "v4:128,128,8,128,16,16,8,1,2,2,0,0,1,1", + "v4:128,160,8,32,160,16,4,1,1,2,0,0,1,1", + "v4:128,64,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:16,128,4,16,128,16,4,1,1,2,0,0,1,1", + "v4:16,64,8,16,64,16,16,1,2,2,0,0,1,1", + "v4:256,128,2,128,64,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,128,64,16,8,1,1,2,0,0,1,1", + "v4:256,256,8,128,32,16,4,1,2,2,0,0,1,1", + "v4:64,128,8,32,128,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,64,128,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,32,128,16,8,1,2,2,0,0,1,1", + "v4:128,32,2,64,32,16,16,1,2,2,0,0,1,1", + "v4:128,32,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:16,96,4,16,96,16,16,1,2,2,0,0,1,1", + "v4:48,48,4,48,48,16,8,1,2,2,0,0,1,1", + "v4:64,16,8,64,16,16,4,1,2,2,0,0,1,1", + "v4:64,256,4,32,256,16,4,1,1,2,0,0,1,1", + "v4:64,256,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,2,128,64,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,16,32,16,8,1,2,1,0,16,1,1", + "v4:128,128,4,128,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,128,64,16,16,1,2,2,0,0,1,1", + "v4:128,16,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:192,96,4,48,96,16,16,1,1,2,0,0,1,1", + "v4:32,256,2,32,256,16,16,1,1,2,0,0,1,1", + "v4:64,128,8,32,128,16,16,1,1,2,0,0,1,1", + "v4:96,64,8,96,16,16,8,1,1,2,0,0,1,1", + "v4:240,32,8,240,16,16,8,1,2,2,0,0,1,1", + "v4:256,32,8,256,32,16,4,1,1,2,0,0,1,1" }; // END_GEMM_Wmma_f16_gfx1201_DEFS // BEGIN_GEMM_Wmma_i8_gfx1201_DEFS const StringRef PopulateParamsWmma::initParametersI8GemmGfx1201[] = { - "v4:128,64,4,64,32,16,16,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,128,4,64,64,16,16,1,1,2,0,0,1,1", - "v4:256,64,4,32,64,16,16,1,1,2,0,0,1,1", - "v4:128,32,4,32,32,16,16,1,1,2,0,0,1,1", - "v4:128,256,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:64,32,8,32,16,16,16,1,1,2,0,0,1,1", + "v4:64,32,8,16,32,16,16,1,2,2,0,0,1,1", "v4:256,128,4,128,32,16,16,1,1,2,0,0,1,1", - "v4:16,128,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:256,128,4,32,128,16,16,1,2,2,0,0,1,1", + "v4:192,64,4,96,32,16,16,1,1,2,0,0,1,1", "v4:256,128,8,64,64,16,8,1,2,2,0,0,1,1", - "v4:128,256,2,128,64,16,8,1,1,2,0,0,1,1", - "v4:256,256,8,64,32,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,32,32,16,16,1,1,2,0,0,1,1" + "v4:16,32,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:128,64,4,128,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,128,32,16,8,1,2,2,0,0,1,1", + "v4:256,128,4,128,32,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,64,64,16,16,1,2,2,0,0,1,1", + "v4:256,64,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:160,128,4,160,16,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,64,32,16,16,1,2,2,0,0,1,1", + "v4:64,256,4,64,32,16,16,1,2,2,0,0,1,1", + "v4:128,64,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:64,64,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,16,128,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,32,64,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,64,64,16,16,1,2,2,0,0,1,1", + "v4:256,32,4,256,32,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,64,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,32,128,16,16,1,2,2,0,0,1,1", + "v4:128,16,8,128,16,16,16,1,2,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,2,2,0,0,1,1", + "v4:160,256,8,160,32,16,8,1,1,2,0,0,1,1", + "v4:256,64,4,256,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,64,128,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,16,4,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,8,128,64,16,4,1,1,2,0,0,1,1", + "v4:128,256,8,32,256,16,4,1,1,2,0,0,1,1", + "v4:128,32,2,128,32,16,16,1,2,2,0,0,1,1", + "v4:128,32,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:16,112,8,16,112,16,16,1,2,2,0,0,1,1", + "v4:192,192,8,48,192,16,16,1,1,2,0,0,1,1", + "v4:224,64,4,112,32,16,16,1,2,2,0,0,1,1", + "v4:256,128,2,64,128,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,64,128,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,256,16,16,8,1,2,2,0,0,1,1", + "v4:256,16,4,128,16,16,16,1,2,2,0,0,1,1", + "v4:256,256,4,128,128,16,8,1,1,2,0,0,1,1", + "v4:256,32,2,256,32,16,8,1,2,2,0,0,1,1", + "v4:112,112,2,112,112,16,16,1,2,2,0,0,1,1", + "v4:128,256,2,32,256,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,16,256,16,8,1,2,2,0,0,1,1", + "v4:16,80,4,16,80,16,16,1,1,2,0,0,1,1", + "v4:224,16,8,224,16,16,16,1,2,2,0,0,1,1", + "v4:240,192,4,240,48,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,64,64,16,16,1,1,1,8,8,1,1", + "v4:256,128,8,32,128,16,4,1,2,2,0,0,1,1", + "v4:256,256,4,128,128,16,4,1,2,2,0,0,1,1", + "v4:256,256,8,64,128,16,16,1,1,2,0,0,1,1", + "v4:64,128,8,16,128,16,4,1,1,2,0,0,1,1", + "v4:64,64,8,64,64,16,8,1,2,2,0,0,1,1", + "v4:112,32,8,112,16,16,4,1,2,2,0,0,1,1", + "v4:192,32,8,96,16,16,16,1,2,1,0,8,1,1", + "v4:48,64,8,48,16,16,16,1,1,0,0,8,1,1", + "v4:64,256,8,64,256,16,16,1,1,2,0,0,1,1" }; // END_GEMM_Wmma_i8_gfx1201_DEFS // BEGIN_CONV_Wmma_f16_gfx1201_DEFS const StringRef PopulateParamsWmma::initParametersF16ConvGfx1201[] = { - "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:128,64,4,32,64,16,8,1,1,2,0,0,1,1", - "v4:128,64,8,32,64,16,8,1,1,2,0,0,1,1", - "v4:64,256,2,64,64,16,8,1,1,2,0,0,1,1", - "v4:128,32,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:64,32,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,2,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,64,64,16,8,1,2,2,0,0,1,1", + "v4:160,128,8,80,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,64,64,16,8,1,1,2,0,0,1,1", "v4:128,128,8,32,64,16,8,1,1,2,0,0,1,1", - "v4:256,128,4,64,64,16,8,1,1,2,0,0,1,1", - "v4:64,128,2,64,32,16,8,1,1,2,0,0,1,1", - "v4:256,64,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,64,16,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,256,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:192,256,8,96,32,16,8,1,1,2,0,0,1,1", "v4:128,16,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:256,128,8,128,32,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,32,8,32,16,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,16,8,1,2,2,0,0,1,1", + "v4:64,128,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:128,256,2,128,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:128,32,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:192,256,4,192,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,2,128,64,16,8,1,2,2,0,0,1,1", + "v4:64,128,4,32,128,16,4,1,1,2,0,0,1,1", + "v4:32,128,2,32,128,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,32,128,16,8,1,2,2,0,0,1,1", "v4:16,64,4,16,64,16,8,1,1,2,0,0,1,1", - "v4:32,128,4,32,32,16,8,1,2,2,0,0,1,1", - "v4:128,128,2,32,128,16,8,1,1,2,0,0,1,1", - "v4:64,128,4,16,128,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:128,16,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,256,2,32,32,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,128,64,16,4,1,1,2,0,0,1,1", + "v4:256,128,8,32,128,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:256,128,4,32,128,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,8,1,2,2,0,0,1,1", "v4:16,64,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:256,256,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", "v4:32,64,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:128,256,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:32,128,2,32,128,16,8,1,1,2,0,0,1,1", - "v4:64,128,8,32,32,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,128,8,16,32,16,8,1,2,2,0,0,1,1", - "v4:32,64,8,32,32,16,8,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,8,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,128,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,64,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,32,4,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1" + "v4:64,128,8,64,16,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:256,64,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:64,32,8,64,16,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:64,128,2,32,64,16,16,1,2,2,0,0,1,1", + "v4:128,32,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,16,16,1,2,2,0,0,1,1", + "v4:128,128,2,64,32,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:256,256,4,128,64,16,8,1,2,2,0,0,1,1", + "v4:256,256,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:128,256,4,64,128,16,8,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,16,1,1,2,0,0,1,1", + "v4:256,128,2,128,64,16,16,1,2,2,0,0,1,1", + "v4:32,48,8,16,48,16,8,1,2,2,0,0,1,1", + "v4:128,128,8,128,16,16,8,1,2,2,0,0,1,1", + "v4:128,256,8,32,128,16,4,1,2,2,0,0,1,1", + "v4:256,128,2,256,32,16,16,1,1,2,0,0,1,1", + "v4:256,128,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:256,64,4,256,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,4,32,128,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,128,64,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,32,64,16,8,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:160,80,2,160,80,16,8,1,1,2,0,0,1,1", + "v4:192,64,2,192,32,16,16,1,1,2,0,0,1,1", + "v4:256,32,8,32,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,128,16,4,1,2,2,0,0,1,1", + "v4:64,32,4,64,32,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,128,16,16,4,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,64,64,16,4,1,2,2,0,0,1,1", + "v4:256,256,8,64,128,16,8,1,1,2,0,0,1,1", + "v4:16,128,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:256,64,2,128,32,16,16,1,2,2,0,0,1,1", + "v4:32,32,4,32,32,16,16,1,1,2,0,0,1,1", + "v4:64,256,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:64,256,4,64,128,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,64,128,16,4,1,1,2,0,0,1,1", + "v4:16,48,8,16,48,16,8,1,2,2,0,0,1,1", + "v4:256,128,2,32,128,16,16,1,2,2,0,0,1,1", + "v4:256,48,8,16,48,16,8,1,1,1,4,64,1,1", + "v4:32,16,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,64,16,8,1,2,2,0,0,1,1", + "v4:80,64,8,80,16,16,8,1,1,0,2,16,1,1", + "v4:96,48,8,48,48,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,128,64,16,4,1,1,2,0,0,1,1", + "v4:64,128,2,64,128,16,16,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,16,1,2,2,0,0,1,1", + "v4:96,240,2,48,240,16,8,1,1,2,0,0,1,1", + "v4:128,160,8,32,160,16,4,1,1,2,0,0,1,1", + "v4:128,256,2,32,256,16,16,1,1,2,0,0,1,1", + "v4:16,256,4,16,256,16,4,1,1,2,0,0,1,1", + "v4:192,128,8,192,64,16,4,1,1,2,0,0,1,1", + "v4:256,64,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:64,16,4,64,16,16,8,1,2,2,0,0,1,1", + "v4:128,32,4,64,32,16,16,1,1,2,0,0,1,1", + "v4:192,96,8,48,48,16,8,1,1,2,0,0,1,1", + "v4:32,32,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,128,2,64,32,16,8,1,2,1,0,4,1,1", + "v4:64,192,2,64,96,16,16,1,2,2,0,0,1,1", + "v4:96,32,8,48,32,16,8,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:192,256,2,192,64,16,8,1,1,2,0,0,1,1", + "v4:224,32,2,112,32,16,16,1,2,2,0,0,1,1", + "v4:32,240,4,16,240,16,16,1,2,2,0,0,1,1", + "v4:64,16,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,32,4,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,4,128,64,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,64,64,16,8,1,1,0,8,32,1,1", + "v4:128,160,4,32,160,16,8,1,1,2,0,0,1,1", + "v4:16,80,8,16,80,16,16,1,1,2,0,0,1,1", + "v4:192,32,8,96,16,16,16,1,2,2,0,0,1,1", + "v4:192,32,8,96,16,16,8,1,2,2,0,0,1,1", + "v4:256,64,8,256,16,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,128,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:96,64,4,48,16,16,16,1,2,2,0,0,1,1", + "v4:112,16,4,112,16,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,64,16,16,1,1,2,0,0,1,1", + "v4:128,64,8,32,64,16,8,1,1,1,2,4,1,1", + "v4:128,64,8,64,64,16,16,1,1,2,0,0,1,1", + "v4:16,160,4,16,160,16,16,1,1,2,0,0,1,1", + "v4:160,160,2,80,160,16,16,1,1,0,4,4,1,1", + "v4:160,32,8,80,32,16,8,1,1,0,16,8,1,1", + "v4:32,128,8,32,16,16,8,1,2,1,0,4,1,1", + "v4:32,16,8,32,16,16,16,1,2,0,0,16,1,1", + "v4:32,256,4,32,256,16,8,1,1,2,0,0,1,1", + "v4:48,128,8,48,16,16,8,1,2,1,16,16,1,1", + "v4:64,128,8,16,64,16,16,1,1,2,0,0,1,1", + "v4:64,224,4,32,224,16,16,1,2,2,0,0,1,1", + "v4:64,256,2,64,128,16,8,1,1,1,0,4,1,1", + "v4:64,256,8,16,256,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,16,16,16,8,1,2,0,2,16,1,1", + "v4:96,32,8,48,16,16,8,1,1,2,0,0,1,1", + "v4:96,32,8,96,32,16,16,1,2,2,0,0,1,1" }; // END_CONV_Wmma_f16_gfx1201_DEFS // BEGIN_CONV_Wmma_i8_gfx1201_DEFS const StringRef PopulateParamsWmma::initParametersI8ConvGfx1201[] = { - "v4:128,64,4,64,32,16,16,1,1,2,0,0,1,1", - "v4:256,64,4,32,64,16,16,1,2,2,0,0,1,1", - "v4:32,64,2,32,32,16,16,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,256,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,16,16,1,2,2,0,0,1,1", + "v4:256,32,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:256,64,8,32,64,16,16,1,1,2,0,0,1,1", "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,16,8,32,16,16,16,1,1,2,0,0,1,1", - "v4:128,128,4,64,64,16,8,1,2,2,0,0,1,1", - "v4:128,128,4,64,64,16,16,1,1,2,0,0,1,1", - "v4:256,32,8,64,32,16,16,1,1,2,0,0,1,1", - "v4:32,256,2,32,128,16,8,1,1,2,0,0,1,1", - "v4:64,128,2,64,32,16,8,1,1,2,0,0,1,1", - "v4:256,128,8,32,128,16,8,1,1,2,0,0,1,1" + "v4:128,64,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:256,128,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:64,128,2,64,128,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,32,128,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,128,64,16,4,1,1,2,0,0,1,1", + "v4:64,128,8,32,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:96,128,4,96,32,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,32,64,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,128,64,16,8,1,2,2,0,0,1,1", + "v4:256,64,4,256,16,16,8,1,2,2,0,0,1,1", + "v4:256,64,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:128,16,4,128,16,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:192,64,2,192,32,16,16,1,1,2,0,0,1,1", + "v4:256,256,4,256,64,16,8,1,2,2,0,0,1,1", + "v4:32,128,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,256,8,64,128,16,4,1,1,2,0,0,1,1", + "v4:128,32,8,64,32,16,16,1,2,2,0,0,1,1", + "v4:160,64,4,160,32,16,8,1,2,2,0,0,1,1", + "v4:256,128,2,64,128,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,256,32,16,4,1,2,2,0,0,1,1", + "v4:256,128,8,64,64,16,8,1,2,2,0,0,1,1", + "v4:64,64,4,64,64,16,16,1,2,2,0,0,1,1", + "v4:128,16,4,128,16,16,16,1,2,2,0,0,1,1", + "v4:128,16,4,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,16,8,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,64,128,16,16,1,1,2,0,0,1,1", + "v4:128,32,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,64,64,16,16,1,2,2,0,0,1,1", + "v4:256,32,8,256,16,16,16,1,2,2,0,0,1,1", + "v4:256,64,2,256,64,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,96,8,32,48,16,16,1,1,2,0,0,1,1", + "v4:256,16,8,64,16,16,16,1,1,2,0,0,1,1", + "v4:256,256,8,128,128,16,4,1,1,2,0,0,1,1", + "v4:32,128,8,16,64,16,16,1,2,2,0,0,1,1", + "v4:64,256,4,32,128,16,16,1,1,2,0,0,1,1", + "v4:96,192,8,96,96,16,8,1,2,2,0,0,1,1", + "v4:128,256,2,32,128,16,8,1,2,2,0,0,1,1", + "v4:128,256,8,64,128,16,8,1,1,2,0,0,1,1", + "v4:128,96,8,32,96,16,4,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:192,48,4,192,48,16,4,1,2,2,0,0,1,1", + "v4:256,128,4,64,128,16,8,1,2,2,0,0,1,1", + "v4:256,16,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:128,16,4,128,16,16,8,1,2,2,0,0,1,1", + "v4:128,256,8,128,64,16,8,1,2,0,4,0,1,1", + "v4:16,128,8,16,128,16,8,1,1,2,0,0,1,1", + "v4:16,256,4,16,64,16,16,1,2,0,8,16,1,1", + "v4:256,128,8,128,64,16,16,1,1,2,0,0,1,1", + "v4:256,16,8,256,16,16,16,1,2,2,0,0,1,1", + "v4:256,256,4,64,256,16,16,1,1,2,0,0,1,1", + "v4:64,128,4,64,128,16,16,1,1,2,0,0,1,1", + "v4:64,256,2,64,256,16,16,1,1,2,0,0,1,1" }; // END_CONV_Wmma_i8_gfx1201_DEFS @@ -2201,22 +2452,22 @@ static const StringRef initParametersI8ConvGfx1100[nInitParametersI8ConvGfx1100] // END_CONV_Wmma_i8_gfx1100_DECS // BEGIN_GEMM_Wmma_f16_gfx1201_DECS -static constexpr size_t nInitParametersF16GemmGfx1201 = 14; +static constexpr size_t nInitParametersF16GemmGfx1201 = 65; static const StringRef initParametersF16GemmGfx1201[nInitParametersF16GemmGfx1201]; // END_GEMM_Wmma_f16_gfx1201_DECS // BEGIN_GEMM_Wmma_i8_gfx1201_DECS -static constexpr size_t nInitParametersI8GemmGfx1201 = 13; +static constexpr size_t nInitParametersI8GemmGfx1201 = 59; static const StringRef initParametersI8GemmGfx1201[nInitParametersI8GemmGfx1201]; // END_GEMM_Wmma_i8_gfx1201_DECS // BEGIN_CONV_Wmma_f16_gfx1201_DECS -static constexpr size_t nInitParametersF16ConvGfx1201 = 36; +static constexpr size_t nInitParametersF16ConvGfx1201 = 141; static const StringRef initParametersF16ConvGfx1201[nInitParametersF16ConvGfx1201]; // END_CONV_Wmma_f16_gfx1201_DECS // BEGIN_CONV_Wmma_i8_gfx1201_DECS -static constexpr size_t nInitParametersI8ConvGfx1201 = 12; +static constexpr size_t nInitParametersI8ConvGfx1201 = 62; static const StringRef initParametersI8ConvGfx1201[nInitParametersI8ConvGfx1201]; // END_CONV_Wmma_i8_gfx1201_DECS From c3100f987f404633f63aea5b9677a40ef683ec1d Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Fri, 23 Jan 2026 00:39:27 +0000 Subject: [PATCH 06/10] Update gfx1101 quick-tune lists for gemm and conv, and delete old gfx1100 lists. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 506 +++++++++++------- 1 file changed, 322 insertions(+), 184 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 626f0514ffc9..ccee1e024771 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -23,24 +23,6 @@ const StringRef PopulateParams::initParametersF32GemmGfx1000[] = { }; // END_GEMM_NonAccel_f32_gfx1000_DEFS -// BEGIN_GEMM_NonAccel_f32_gfx1100_DEFS -const StringRef PopulateParams::initParametersF32GemmGfx1100[] = { - "v3:64,64,32,16,2,2,1,1,2", - "v3:64,64,64,4,2,2,1,1,2", - "v3:128,128,128,16,2,2,1,1,2", - "v3:64,32,128,16,2,2,1,1,2", - "v3:128,128,32,16,4,2,1,1,2", - "v3:64,32,64,8,4,4,1,1,2", - "v3:64,64,128,8,4,2,1,1,2", - "v3:64,32,32,4,2,2,1,1,2", - "v3:128,64,64,4,2,2,1,1,2", - "v3:64,32,32,16,2,4,1,1,2", - "v3:128,32,64,16,2,4,1,1,2", - "v3:256,64,32,16,4,2,1,1,2", - "v3:256,64,128,8,2,2,1,1,2" -}; -// END_GEMM_NonAccel_f32_gfx1100_DEFS - // BEGIN_CONV_NonAccel_f32_gfx1000_DEFS const StringRef PopulateParams::initParametersF32ConvGfx1000[] = { "v3:128,128,128,4,2,4,1,1,2", @@ -70,36 +52,6 @@ const StringRef PopulateParams::initParametersF32ConvGfx1000[] = { }; // END_CONV_NonAccel_f32_gfx1000_DEFS -// BEGIN_CONV_NonAccel_f32_gfx1100_DEFS -const StringRef PopulateParams::initParametersF32ConvGfx1100[] = { - "v3:64,32,32,8,4,4,1,1,2", - "v3:64,32,32,8,2,2,1,1,2", - "v3:128,32,32,4,2,2,1,1,2", - "v3:128,32,64,4,2,2,1,1,2", - "v3:64,32,32,16,2,4,1,1,2", - "v3:64,32,32,4,2,4,1,1,2", - "v3:64,64,32,16,2,2,1,1,2", - "v3:64,64,128,4,2,2,1,1,2", - "v3:128,32,32,16,2,2,1,1,2", - "v3:64,32,64,8,4,2,1,1,2", - "v3:128,32,32,16,2,4,1,1,2", - "v3:64,64,64,16,2,2,1,1,2", - "v3:64,64,64,4,4,2,1,1,2", - "v3:128,32,32,8,2,4,1,1,2", - "v3:128,128,128,8,2,2,1,1,2", - "v3:256,32,32,16,2,2,1,1,2", - "v3:64,128,64,8,2,4,1,1,2", - "v3:64,64,64,16,2,4,1,1,2", - "v3:256,32,32,8,2,2,1,1,2", - "v3:64,64,128,16,4,4,1,1,2", - "v3:128,64,128,16,2,2,1,1,2", - "v3:128,128,32,16,2,4,1,1,2", - "v3:64,32,32,4,2,2,1,1,2", - "v3:64,64,64,4,2,2,1,1,2", - "v3:128,32,32,4,2,4,1,1,2" -}; -// END_CONV_NonAccel_f32_gfx1100_DEFS - // BEGIN_GEMM_NonAccel_f32_gfx1201_DEFS const StringRef PopulateParams::initParametersF32GemmGfx1201[] = { "v3:64,64,128,4,2,4,1,1,2", @@ -271,6 +223,60 @@ const StringRef PopulateParams::initParametersF32ConvGfx1150[] = { }; // END_CONV_NonAccel_f32_gfx1150_DEFS +// BEGIN_GEMM_NonAccel_f32_gfx1101_DEFS +const StringRef PopulateParams::initParametersF32GemmGfx1101[] = { + "v3:128,128,128,8,2,4,1,1,2", + "v3:128,32,32,16,2,2,1,1,2", + "v3:64,128,64,16,2,4,1,1,2", + "v3:64,64,128,4,2,2,1,1,2", + "v3:64,64,128,8,2,4,1,1,2", + "v3:64,64,64,16,4,2,1,1,2", + "v3:64,64,128,16,4,2,1,1,2", + "v3:64,128,64,4,2,2,1,1,2", + "v3:64,32,32,8,2,4,1,1,2", + "v3:128,128,128,8,2,2,1,1,2", + "v3:64,32,64,8,4,4,1,1,2", + "v3:64,32,128,16,4,2,1,1,2", + "v3:128,128,64,8,2,2,1,1,2" +}; +// END_GEMM_NonAccel_f32_gfx1101_DEFS + +// BEGIN_CONV_NonAccel_f32_gfx1101_DEFS +const StringRef PopulateParams::initParametersF32ConvGfx1101[] = { + "v3:64,64,128,4,2,2,1,1,2", + "v3:64,64,64,4,2,2,1,1,2", + "v3:64,64,128,4,2,4,1,1,2", + "v3:64,64,128,8,2,4,1,1,2", + "v3:128,64,64,8,2,4,1,1,2", + "v3:64,64,64,4,4,2,1,1,2", + "v3:64,32,32,8,2,2,1,1,2", + "v3:64,64,128,8,2,2,1,1,2", + "v3:64,64,128,8,4,4,1,1,2", + "v3:128,64,64,8,2,2,1,1,2", + "v3:64,32,64,8,4,2,1,1,2", + "v3:128,128,128,4,2,2,1,1,2", + "v3:64,32,128,8,2,2,1,1,2", + "v3:64,32,128,4,2,2,1,1,2", + "v3:128,32,32,16,2,2,1,1,2", + "v3:64,128,64,8,2,4,1,1,2", + "v3:64,64,64,16,2,4,1,1,2", + "v3:64,128,64,4,4,2,1,1,2", + "v3:64,64,128,16,2,4,1,1,2", + "v3:128,128,128,16,4,4,1,1,2", + "v3:256,128,128,8,4,4,1,1,2", + "v3:64,64,64,8,4,4,1,1,2", + "v3:256,32,32,16,2,2,1,1,2", + "v3:64,64,32,8,2,4,1,1,2", + "v3:64,32,32,16,2,2,1,1,2", + "v3:128,128,128,8,4,2,1,1,2", + "v3:128,128,64,8,2,2,1,1,2", + "v3:128,64,32,16,2,4,1,1,2", + "v3:256,64,128,16,4,2,1,1,2", + "v3:128,128,64,16,2,4,1,1,2", + "v3:256,128,64,4,2,2,1,1,2" +}; +// END_CONV_NonAccel_f32_gfx1101_DEFS + #endif // NonAccel_DEFINITIONS_GEN #ifdef NonAccel_DECLARATIONS_GEN @@ -280,21 +286,11 @@ static constexpr size_t nInitParametersF32GemmGfx1000 = 14; static const StringRef initParametersF32GemmGfx1000[nInitParametersF32GemmGfx1000]; // END_GEMM_NonAccel_f32_gfx1000_DECS -// BEGIN_GEMM_NonAccel_f32_gfx1100_DECS -static constexpr size_t nInitParametersF32GemmGfx1100 = 13; -static const StringRef initParametersF32GemmGfx1100[nInitParametersF32GemmGfx1100]; -// END_GEMM_NonAccel_f32_gfx1100_DECS - // BEGIN_CONV_NonAccel_f32_gfx1000_DECS static constexpr size_t nInitParametersF32ConvGfx1000 = 24; static const StringRef initParametersF32ConvGfx1000[nInitParametersF32ConvGfx1000]; // END_CONV_NonAccel_f32_gfx1000_DECS -// BEGIN_CONV_NonAccel_f32_gfx1100_DECS -static constexpr size_t nInitParametersF32ConvGfx1100 = 25; -static const StringRef initParametersF32ConvGfx1100[nInitParametersF32ConvGfx1100]; -// END_CONV_NonAccel_f32_gfx1100_DECS - // BEGIN_GEMM_NonAccel_f32_gfx1201_DECS static constexpr size_t nInitParametersF32GemmGfx1201 = 12; static const StringRef initParametersF32GemmGfx1201[nInitParametersF32GemmGfx1201]; @@ -325,6 +321,16 @@ static constexpr size_t nInitParametersF32ConvGfx1150 = 42; static const StringRef initParametersF32ConvGfx1150[nInitParametersF32ConvGfx1150]; // END_CONV_NonAccel_f32_gfx1150_DECS +// BEGIN_GEMM_NonAccel_f32_gfx1101_DECS +static constexpr size_t nInitParametersF32GemmGfx1101 = 13; +static const StringRef initParametersF32GemmGfx1101[nInitParametersF32GemmGfx1101]; +// END_GEMM_NonAccel_f32_gfx1101_DECS + +// BEGIN_CONV_NonAccel_f32_gfx1101_DECS +static constexpr size_t nInitParametersF32ConvGfx1101 = 31; +static const StringRef initParametersF32ConvGfx1101[nInitParametersF32ConvGfx1101]; +// END_CONV_NonAccel_f32_gfx1101_DECS + #endif // NonAccel_DECLARATIONS_GEN #ifdef XDL_DEFINITIONS_GEN @@ -1573,32 +1579,6 @@ const StringRef PopulateParamsWmma::initParametersF16GemmGfx1000[] = { }; // END_GEMM_Wmma_f16_gfx1000_DEFS -// BEGIN_GEMM_Wmma_f16_gfx1100_DEFS -const StringRef PopulateParamsWmma::initParametersF16GemmGfx1100[] = { - "v4:128,64,4,128,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,64,32,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:256,128,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,2,128,32,16,8,1,2,2,0,0,1,1", - "v4:128,256,8,128,32,16,8,1,1,2,0,0,1,1", - "v4:32,256,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:128,32,2,32,32,16,8,1,2,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,64,2,32,64,16,8,1,2,2,0,0,1,1", - "v4:32,32,8,32,32,16,16,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,16,8,32,16,16,16,1,1,2,0,0,1,1" -}; -// END_GEMM_Wmma_f16_gfx1100_DEFS - // BEGIN_CONV_Wmma_f16_gfx1000_DEFS const StringRef PopulateParamsWmma::initParametersF16ConvGfx1000[] = { "v4:128,64,8,32,64,16,8,1,1,2,0,0,1,1", @@ -1630,38 +1610,6 @@ const StringRef PopulateParamsWmma::initParametersF16ConvGfx1000[] = { }; // END_CONV_Wmma_f16_gfx1000_DEFS -// BEGIN_CONV_Wmma_f16_gfx1100_DEFS -const StringRef PopulateParamsWmma::initParametersF16ConvGfx1100[] = { - "v4:64,128,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,64,4,32,32,16,8,1,1,2,0,0,1,1", - "v4:256,64,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,128,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:32,32,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:32,16,4,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,32,4,64,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,32,4,32,16,16,8,1,1,2,0,0,1,1", - "v4:128,64,2,32,64,16,8,1,2,2,0,0,1,1", - "v4:16,32,4,16,32,16,16,1,1,2,0,0,1,1", - "v4:64,64,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,256,2,32,64,16,8,1,1,2,0,0,1,1", - "v4:128,256,2,128,64,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:16,256,4,16,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,2,64,32,16,16,1,1,2,0,0,1,1", - "v4:256,64,2,64,32,16,8,1,2,2,0,0,1,1", - "v4:32,128,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,256,2,32,64,16,8,1,2,2,0,0,1,1", - "v4:32,128,4,32,32,16,4,1,1,2,0,0,1,1", - "v4:32,256,2,32,64,16,8,1,2,2,0,0,1,1", - "v4:64,256,2,32,64,16,8,1,1,2,0,0,1,1" -}; -// END_CONV_Wmma_f16_gfx1100_DEFS - // BEGIN_GEMM_Wmma_fp8_gfx1000_DEFS const StringRef PopulateParamsWmma::initParametersFp8GemmGfx1000[] = { "v4:128,128,4,32,64,16,16,1,1,2,0,0,1,1", @@ -1731,20 +1679,6 @@ const StringRef PopulateParamsWmma::initParametersI8GemmGfx1000[] = { }; // END_GEMM_Wmma_i8_gfx1000_DEFS -// BEGIN_GEMM_Wmma_i8_gfx1100_DEFS -const StringRef PopulateParamsWmma::initParametersI8GemmGfx1100[] = { - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,32,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:128,32,4,64,16,16,16,1,1,2,0,0,1,1", - "v4:256,64,4,128,32,16,16,1,1,2,0,0,1,1", - "v4:128,32,2,32,32,16,16,1,1,2,0,0,1,1", - "v4:64,16,4,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:128,64,8,16,16,16,4,1,2,2,0,0,1,1" -}; -// END_GEMM_Wmma_i8_gfx1100_DEFS - // BEGIN_CONV_Wmma_i8_gfx1000_DEFS const StringRef PopulateParamsWmma::initParametersI8ConvGfx1000[] = { "v4:128,64,8,32,64,16,16,1,1,2,0,0,1,1", @@ -1761,24 +1695,6 @@ const StringRef PopulateParamsWmma::initParametersI8ConvGfx1000[] = { }; // END_CONV_Wmma_i8_gfx1000_DEFS -// BEGIN_CONV_Wmma_i8_gfx1100_DEFS -const StringRef PopulateParamsWmma::initParametersI8ConvGfx1100[] = { - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,64,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:128,16,8,32,16,16,16,1,1,2,0,0,1,1", - "v4:256,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:128,64,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,128,8,16,32,16,16,1,2,2,0,0,1,1", - "v4:256,128,4,128,16,16,8,1,2,2,0,0,1,1", - "v4:256,64,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,128,2,32,32,16,8,1,1,2,0,0,1,1", - "v4:32,32,2,32,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,128,128,16,4,1,2,2,0,0,1,1", - "v4:128,128,4,128,64,16,4,1,2,2,0,0,1,1" -}; -// END_CONV_Wmma_i8_gfx1100_DEFS - // BEGIN_GEMM_Wmma_f16_gfx1201_DEFS const StringRef PopulateParamsWmma::initParametersF16GemmGfx1201[] = { "v4:128,128,2,32,128,16,8,1,1,2,0,0,1,1", @@ -2397,6 +2313,228 @@ const StringRef PopulateParamsWmma::initParametersI8ConvGfx1150[] = { }; // END_CONV_Wmma_i8_gfx1150_DEFS +// BEGIN_GEMM_Wmma_f16_gfx1101_DEFS +const StringRef PopulateParamsWmma::initParametersF16GemmGfx1101[] = { + "v4:128,64,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:16,64,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:16,96,8,16,48,16,8,1,1,2,0,0,1,1", + "v4:192,256,8,96,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,32,128,16,8,1,1,2,0,0,1,1", + "v4:32,256,2,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,32,4,32,32,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,64,64,16,8,1,1,2,0,0,1,1", + "v4:32,128,2,32,64,16,8,1,2,2,0,0,1,1", + "v4:16,256,4,16,64,16,8,1,2,2,0,0,1,1", + "v4:128,16,8,64,16,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,32,128,16,8,1,2,2,0,0,1,1", + "v4:16,128,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:96,128,2,96,32,16,8,1,2,2,0,0,1,1", + "v4:224,64,4,112,32,16,8,1,1,0,1,8,1,1", + "v4:96,48,8,48,48,16,8,1,1,0,16,8,1,1", + "v4:128,128,8,64,64,16,8,1,1,0,8,32,1,1", + "v4:128,64,2,32,64,16,16,1,2,2,0,0,1,1", + "v4:256,32,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,256,4,16,256,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,8,1,1,0,16,0,1,1", + "v4:16,80,8,16,80,16,16,1,1,2,0,0,1,1", + "v4:192,64,8,48,32,16,8,1,1,1,4,0,1,1", + "v4:224,256,8,112,32,16,8,1,1,2,0,0,1,1", + "v4:224,64,8,112,32,16,8,1,1,1,8,64,1,1", + "v4:256,256,8,32,64,16,8,1,1,2,0,0,1,1", + "v4:256,64,8,64,64,16,4,1,1,2,0,0,1,1", + "v4:48,96,8,48,48,16,4,1,1,0,16,0,1,1", + "v4:64,128,8,32,128,16,8,1,1,2,0,0,1,1" +}; +// END_GEMM_Wmma_f16_gfx1101_DEFS + +// BEGIN_GEMM_Wmma_i8_gfx1101_DEFS +const StringRef PopulateParamsWmma::initParametersI8GemmGfx1101[] = { + "v4:48,64,4,48,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,16,32,16,16,1,2,2,0,0,1,1", + "v4:128,64,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,128,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,2,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,96,2,32,48,16,16,1,1,2,0,0,1,1", + "v4:64,128,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:32,128,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:144,64,4,144,16,16,8,1,1,2,0,0,1,1", + "v4:64,128,2,32,128,16,16,1,2,2,0,0,1,1", + "v4:64,256,8,16,128,16,8,1,2,2,0,0,1,1", + "v4:192,128,4,192,16,16,16,1,1,2,0,0,1,1", + "v4:256,32,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,64,32,16,16,1,1,2,0,0,1,1", + "v4:256,32,4,32,32,16,16,1,1,0,16,16,1,1", + "v4:64,128,4,32,128,16,16,1,1,0,4,4,1,1" +}; +// END_GEMM_Wmma_i8_gfx1101_DEFS + +// BEGIN_CONV_Wmma_f16_gfx1101_DEFS +const StringRef PopulateParamsWmma::initParametersF16ConvGfx1101[] = { + "v4:64,128,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,128,2,64,32,16,8,1,1,2,0,0,1,1", + "v4:32,64,4,32,32,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,128,16,16,8,1,1,2,0,0,1,1", + "v4:48,64,8,48,16,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,16,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,2,64,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,2,64,64,16,8,1,1,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,16,8,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,32,2,32,32,16,8,1,2,2,0,0,1,1", + "v4:96,64,4,48,16,16,8,1,2,2,0,0,1,1", + "v4:128,64,4,32,64,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:224,64,4,112,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,16,128,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:192,64,4,48,32,16,8,1,2,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,1,0,16,32,1,1", + "v4:128,128,2,64,64,16,8,1,1,2,0,0,1,1", + "v4:80,64,8,80,16,16,8,1,1,2,0,0,1,1", + "v4:256,64,4,128,16,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,256,8,16,128,16,8,1,1,2,0,0,1,1", + "v4:32,32,8,32,32,16,4,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,16,1,2,2,0,0,1,1", + "v4:32,128,4,32,128,16,4,1,1,2,0,0,1,1", + "v4:128,32,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:256,64,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:256,48,8,16,48,16,8,1,1,2,0,0,1,1", + "v4:192,96,2,48,96,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,32,128,16,8,1,2,2,0,0,1,1", + "v4:64,256,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:96,32,8,48,32,16,8,1,1,0,16,32,1,1", + "v4:128,64,4,32,16,16,8,1,1,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,32,8,32,16,16,8,1,1,0,16,32,1,1", + "v4:48,64,8,48,16,16,8,1,1,0,16,32,1,1", + "v4:128,32,4,32,32,16,16,1,2,2,0,0,1,1", + "v4:192,64,8,48,32,16,8,1,1,0,16,0,1,1", + "v4:192,64,8,48,32,16,8,1,1,0,16,16,1,1", + "v4:256,128,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,16,8,1,1,0,8,32,1,1", + "v4:128,128,8,64,32,16,8,1,1,0,4,0,1,1", + "v4:128,16,4,64,16,16,8,1,2,2,0,0,1,1", + "v4:256,128,8,32,128,16,8,1,1,0,16,64,1,1", + "v4:32,80,8,16,80,16,4,1,2,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,2,1,16,4,1,1", + "v4:80,64,8,80,16,16,8,1,1,0,16,8,1,1", + "v4:128,128,8,128,16,16,8,1,1,0,16,16,1,1", + "v4:128,128,8,128,16,16,8,1,1,0,16,32,1,1", + "v4:128,16,8,32,16,16,8,1,1,0,16,8,1,1", + "v4:128,256,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,16,32,16,8,1,1,0,16,32,1,1", + "v4:144,64,8,144,16,16,8,1,1,2,0,0,1,1", + "v4:192,64,8,48,32,16,8,1,1,0,16,64,1,1", + "v4:128,128,2,128,32,16,8,1,1,0,2,64,1,1", + "v4:128,128,2,128,32,16,8,1,1,0,4,64,1,1", + "v4:128,128,8,128,16,16,8,1,1,1,16,0,1,1", + "v4:128,64,4,128,16,16,16,1,1,2,0,0,1,1", + "v4:128,64,8,64,32,16,4,1,2,2,0,0,1,1", + "v4:16,160,4,16,160,16,8,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,1,0,16,64,1,1", + "v4:256,128,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:256,128,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,32,128,16,8,1,1,0,16,0,1,1", + "v4:64,128,4,64,32,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,64,32,16,8,1,1,0,8,16,1,1", + "v4:96,128,4,96,32,16,8,1,1,0,16,32,1,1", + "v4:96,32,2,96,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,8,1,1,0,4,0,1,1", + "v4:128,128,8,64,64,16,8,1,1,1,0,16,1,1", + "v4:128,128,8,64,64,16,8,1,1,1,2,8,1,1", + "v4:128,32,8,32,32,16,8,1,1,1,16,16,1,1", + "v4:128,64,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,4,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,1,0,16,8,1,1", + "v4:160,128,8,80,32,16,8,1,1,0,16,0,1,1", + "v4:192,64,4,96,32,16,8,1,1,2,0,0,1,1", + "v4:256,256,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,16,128,16,8,1,1,0,16,0,1,1", + "v4:64,128,8,32,32,16,8,1,1,0,8,64,1,1", + "v4:64,16,8,16,16,16,16,1,1,0,16,8,1,1", + "v4:64,256,4,64,64,16,4,1,1,0,16,64,1,1", + "v4:64,64,4,64,64,16,4,1,1,0,16,16,1,1", + "v4:96,64,8,48,32,16,8,1,1,0,16,64,1,1", + "v4:112,64,8,112,16,16,8,1,1,0,16,32,1,1", + "v4:128,128,2,128,32,16,16,1,1,0,16,16,1,1", + "v4:128,128,2,128,32,16,8,1,2,0,2,32,1,1", + "v4:128,128,4,32,64,16,8,1,1,1,8,4,1,1", + "v4:128,128,8,64,64,16,8,1,1,0,2,8,1,1", + "v4:128,128,8,64,64,16,8,1,1,1,0,32,1,1", + "v4:128,128,8,64,64,16,8,1,1,1,4,32,1,1", + "v4:128,64,2,64,64,16,16,1,1,0,8,16,1,1", + "v4:128,64,8,64,64,16,4,1,2,2,0,0,1,1", + "v4:128,96,8,32,96,16,8,1,1,0,16,4,1,1", + "v4:16,224,4,16,224,16,4,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,1,0,0,0,1,1", + "v4:160,128,8,80,32,16,8,1,1,0,16,64,1,1", + "v4:160,128,8,80,32,16,8,1,1,0,4,64,1,1", + "v4:192,64,4,96,32,16,16,1,1,0,16,8,1,1", + "v4:256,128,4,64,64,16,16,1,1,0,2,64,1,1", + "v4:256,128,4,64,64,16,16,1,1,0,8,16,1,1", + "v4:256,128,8,32,128,16,8,1,1,1,16,8,1,1", + "v4:32,256,4,32,128,16,8,1,2,2,0,0,1,1", + "v4:32,64,4,32,16,16,4,1,1,0,8,32,1,1", + "v4:48,48,2,48,48,16,8,1,1,1,0,64,1,1", + "v4:64,112,8,16,112,16,8,1,1,0,16,32,1,1", + "v4:64,128,2,64,64,16,8,1,1,0,16,0,1,1", + "v4:64,128,4,16,64,16,16,1,2,2,0,0,1,1", + "v4:64,128,4,64,16,16,16,1,2,2,0,0,1,1", + "v4:64,128,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:64,256,4,16,256,16,16,1,1,2,0,0,1,1", + "v4:64,256,8,64,32,16,8,1,1,0,1,32,1,1", + "v4:64,48,4,16,48,16,8,1,2,1,16,4,1,1", + "v4:64,96,4,32,48,16,8,1,1,0,16,64,1,1", + "v4:96,192,2,48,48,16,8,1,1,0,1,64,1,1" +}; +// END_CONV_Wmma_f16_gfx1101_DEFS + +// BEGIN_CONV_Wmma_i8_gfx1101_DEFS +const StringRef PopulateParamsWmma::initParametersI8ConvGfx1101[] = { + "v4:256,32,4,64,32,16,16,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,32,16,16,16,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:64,128,2,64,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,4,64,64,16,4,1,1,2,0,0,1,1", + "v4:96,64,4,48,16,16,16,1,1,2,0,0,1,1", + "v4:256,64,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,4,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:32,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,64,16,16,8,1,2,2,0,0,1,1", + "v4:256,64,2,256,32,16,16,1,2,2,0,0,1,1", + "v4:32,256,4,16,256,16,16,1,2,2,0,0,1,1", + "v4:64,256,4,32,128,16,16,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,4,1,2,1,8,4,1,1", + "v4:256,128,8,128,128,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,32,64,16,16,1,2,2,0,0,1,1" +}; +// END_CONV_Wmma_i8_gfx1101_DEFS + #endif // Wmma_DEFINITIONS_GEN #ifdef Wmma_DECLARATIONS_GEN @@ -2406,21 +2544,11 @@ static constexpr size_t nInitParametersF16GemmGfx1000 = 17; static const StringRef initParametersF16GemmGfx1000[nInitParametersF16GemmGfx1000]; // END_GEMM_Wmma_f16_gfx1000_DECS -// BEGIN_GEMM_Wmma_f16_gfx1100_DECS -static constexpr size_t nInitParametersF16GemmGfx1100 = 21; -static const StringRef initParametersF16GemmGfx1100[nInitParametersF16GemmGfx1100]; -// END_GEMM_Wmma_f16_gfx1100_DECS - // BEGIN_CONV_Wmma_f16_gfx1000_DECS static constexpr size_t nInitParametersF16ConvGfx1000 = 26; static const StringRef initParametersF16ConvGfx1000[nInitParametersF16ConvGfx1000]; // END_CONV_Wmma_f16_gfx1000_DECS -// BEGIN_CONV_Wmma_f16_gfx1100_DECS -static constexpr size_t nInitParametersF16ConvGfx1100 = 27; -static const StringRef initParametersF16ConvGfx1100[nInitParametersF16ConvGfx1100]; -// END_CONV_Wmma_f16_gfx1100_DECS - // BEGIN_GEMM_Wmma_fp8_gfx1000_DECS static constexpr size_t nInitParametersFp8GemmGfx1000 = 18; static const StringRef initParametersFp8GemmGfx1000[nInitParametersFp8GemmGfx1000]; @@ -2436,21 +2564,11 @@ static constexpr size_t nInitParametersI8GemmGfx1000 = 15; static const StringRef initParametersI8GemmGfx1000[nInitParametersI8GemmGfx1000]; // END_GEMM_Wmma_i8_gfx1000_DECS -// BEGIN_GEMM_Wmma_i8_gfx1100_DECS -static constexpr size_t nInitParametersI8GemmGfx1100 = 9; -static const StringRef initParametersI8GemmGfx1100[nInitParametersI8GemmGfx1100]; -// END_GEMM_Wmma_i8_gfx1100_DECS - // BEGIN_CONV_Wmma_i8_gfx1000_DECS static constexpr size_t nInitParametersI8ConvGfx1000 = 11; static const StringRef initParametersI8ConvGfx1000[nInitParametersI8ConvGfx1000]; // END_CONV_Wmma_i8_gfx1000_DECS -// BEGIN_CONV_Wmma_i8_gfx1100_DECS -static constexpr size_t nInitParametersI8ConvGfx1100 = 13; -static const StringRef initParametersI8ConvGfx1100[nInitParametersI8ConvGfx1100]; -// END_CONV_Wmma_i8_gfx1100_DECS - // BEGIN_GEMM_Wmma_f16_gfx1201_DECS static constexpr size_t nInitParametersF16GemmGfx1201 = 65; static const StringRef initParametersF16GemmGfx1201[nInitParametersF16GemmGfx1201]; @@ -2511,6 +2629,26 @@ static constexpr size_t nInitParametersI8ConvGfx1150 = 21; static const StringRef initParametersI8ConvGfx1150[nInitParametersI8ConvGfx1150]; // END_CONV_Wmma_i8_gfx1150_DECS +// BEGIN_GEMM_Wmma_f16_gfx1101_DECS +static constexpr size_t nInitParametersF16GemmGfx1101 = 35; +static const StringRef initParametersF16GemmGfx1101[nInitParametersF16GemmGfx1101]; +// END_GEMM_Wmma_f16_gfx1101_DECS + +// BEGIN_GEMM_Wmma_i8_gfx1101_DECS +static constexpr size_t nInitParametersI8GemmGfx1101 = 18; +static const StringRef initParametersI8GemmGfx1101[nInitParametersI8GemmGfx1101]; +// END_GEMM_Wmma_i8_gfx1101_DECS + +// BEGIN_CONV_Wmma_f16_gfx1101_DECS +static constexpr size_t nInitParametersF16ConvGfx1101 = 128; +static const StringRef initParametersF16ConvGfx1101[nInitParametersF16ConvGfx1101]; +// END_CONV_Wmma_f16_gfx1101_DECS + +// BEGIN_CONV_Wmma_i8_gfx1101_DECS +static constexpr size_t nInitParametersI8ConvGfx1101 = 21; +static const StringRef initParametersI8ConvGfx1101[nInitParametersI8ConvGfx1101]; +// END_CONV_Wmma_i8_gfx1101_DECS + #endif // Wmma_DECLARATIONS_GEN #ifdef GemmGemm_DEFINITIONS_GEN @@ -3191,12 +3329,8 @@ static const StringRef initParametersBf16AttentionGfx1150[nInitParametersBf16Att {"gfx1000_gemm_f32", {PopulateParams::initParametersF32GemmGfx1000, PopulateParams::nInitParametersF32GemmGfx1000}}, -{"gfx1100_gemm_f32", {PopulateParams::initParametersF32GemmGfx1100, PopulateParams::nInitParametersF32GemmGfx1100}}, - {"gfx1000_conv_f32", {PopulateParams::initParametersF32ConvGfx1000, PopulateParams::nInitParametersF32ConvGfx1000}}, -{"gfx1100_conv_f32", {PopulateParams::initParametersF32ConvGfx1100, PopulateParams::nInitParametersF32ConvGfx1100}}, - {"gfx1201_gemm_f32", {PopulateParams::initParametersF32GemmGfx1201, PopulateParams::nInitParametersF32GemmGfx1201}}, {"gfx1201_conv_f32", {PopulateParams::initParametersF32ConvGfx1201, PopulateParams::nInitParametersF32ConvGfx1201}}, @@ -3209,6 +3343,10 @@ static const StringRef initParametersBf16AttentionGfx1150[nInitParametersBf16Att {"gfx1150_conv_f32", {PopulateParams::initParametersF32ConvGfx1150, PopulateParams::nInitParametersF32ConvGfx1150}}, +{"gfx1101_gemm_f32", {PopulateParams::initParametersF32GemmGfx1101, PopulateParams::nInitParametersF32GemmGfx1101}}, + +{"gfx1101_conv_f32", {PopulateParams::initParametersF32ConvGfx1101, PopulateParams::nInitParametersF32ConvGfx1101}}, + #endif // NonAccel_LOOKUP_TABLE_GEN #ifdef Accel_LOOKUP_TABLE_GEN @@ -3269,24 +3407,16 @@ static const StringRef initParametersBf16AttentionGfx1150[nInitParametersBf16Att {"gfx1000_gemm_f16", {PopulateParamsWmma::initParametersF16GemmGfx1000, PopulateParamsWmma::nInitParametersF16GemmGfx1000}}, -{"gfx1100_gemm_f16", {PopulateParamsWmma::initParametersF16GemmGfx1100, PopulateParamsWmma::nInitParametersF16GemmGfx1100}}, - {"gfx1000_conv_f16", {PopulateParamsWmma::initParametersF16ConvGfx1000, PopulateParamsWmma::nInitParametersF16ConvGfx1000}}, -{"gfx1100_conv_f16", {PopulateParamsWmma::initParametersF16ConvGfx1100, PopulateParamsWmma::nInitParametersF16ConvGfx1100}}, - {"gfx1000_gemm_fp8", {PopulateParamsWmma::initParametersFp8GemmGfx1000, PopulateParamsWmma::nInitParametersFp8GemmGfx1000}}, {"gfx1000_conv_fp8", {PopulateParamsWmma::initParametersFp8ConvGfx1000, PopulateParamsWmma::nInitParametersFp8ConvGfx1000}}, {"gfx1000_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1000, PopulateParamsWmma::nInitParametersI8GemmGfx1000}}, -{"gfx1100_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1100, PopulateParamsWmma::nInitParametersI8GemmGfx1100}}, - {"gfx1000_conv_i8", {PopulateParamsWmma::initParametersI8ConvGfx1000, PopulateParamsWmma::nInitParametersI8ConvGfx1000}}, -{"gfx1100_conv_i8", {PopulateParamsWmma::initParametersI8ConvGfx1100, PopulateParamsWmma::nInitParametersI8ConvGfx1100}}, - {"gfx1201_gemm_f16", {PopulateParamsWmma::initParametersF16GemmGfx1201, PopulateParamsWmma::nInitParametersF16GemmGfx1201}}, {"gfx1201_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1201, PopulateParamsWmma::nInitParametersI8GemmGfx1201}}, @@ -3329,12 +3459,8 @@ static const StringRef initParametersBf16AttentionGfx1150[nInitParametersBf16Att {"gfx1000_gemm_bf16", {PopulateParamsWmma::initParametersF16GemmGfx1000, PopulateParamsWmma::nInitParametersF16GemmGfx1000}}, // alias -> f16 -{"gfx1100_gemm_bf16", {PopulateParamsWmma::initParametersF16GemmGfx1100, PopulateParamsWmma::nInitParametersF16GemmGfx1100}}, // alias -> f16 - {"gfx1000_conv_bf16", {PopulateParamsWmma::initParametersF16ConvGfx1000, PopulateParamsWmma::nInitParametersF16ConvGfx1000}}, // alias -> f16 -{"gfx1100_conv_bf16", {PopulateParamsWmma::initParametersF16ConvGfx1100, PopulateParamsWmma::nInitParametersF16ConvGfx1100}}, // alias -> f16 - {"gfx1201_gemm_bf16", {PopulateParamsWmma::initParametersF16GemmGfx1201, PopulateParamsWmma::nInitParametersF16GemmGfx1201}}, // alias -> f16 {"gfx1201_conv_bf16", {PopulateParamsWmma::initParametersF16ConvGfx1201, PopulateParamsWmma::nInitParametersF16ConvGfx1201}}, // alias -> f16 @@ -3347,6 +3473,18 @@ static const StringRef initParametersBf16AttentionGfx1150[nInitParametersBf16Att {"gfx1150_conv_bf16", {PopulateParamsWmma::initParametersF16ConvGfx1150, PopulateParamsWmma::nInitParametersF16ConvGfx1150}}, // alias -> f16 +{"gfx1101_gemm_f16", {PopulateParamsWmma::initParametersF16GemmGfx1101, PopulateParamsWmma::nInitParametersF16GemmGfx1101}}, + +{"gfx1101_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1101, PopulateParamsWmma::nInitParametersI8GemmGfx1101}}, + +{"gfx1101_conv_f16", {PopulateParamsWmma::initParametersF16ConvGfx1101, PopulateParamsWmma::nInitParametersF16ConvGfx1101}}, + +{"gfx1101_conv_i8", {PopulateParamsWmma::initParametersI8ConvGfx1101, PopulateParamsWmma::nInitParametersI8ConvGfx1101}}, + +{"gfx1101_gemm_bf16", {PopulateParamsWmma::initParametersF16GemmGfx1101, PopulateParamsWmma::nInitParametersF16GemmGfx1101}}, // alias -> f16 + +{"gfx1101_conv_bf16", {PopulateParamsWmma::initParametersF16ConvGfx1101, PopulateParamsWmma::nInitParametersF16ConvGfx1101}}, // alias -> f16 + #endif // Accel_LOOKUP_TABLE_GEN #ifdef GemmGemm_LOOKUP_TABLE_GEN From 1c8a1f8ca2f15af6581d2ea75ab568904d91d20e Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Fri, 23 Jan 2026 10:34:15 +0000 Subject: [PATCH 07/10] Update gfx1150 quick-tune lists for gemm. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 93 ++++++++++--------- 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index ccee1e024771..d09aa582a266 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -160,19 +160,20 @@ const StringRef PopulateParams::initParametersF32ConvGfx1151[] = { // BEGIN_GEMM_NonAccel_f32_gfx1150_DEFS const StringRef PopulateParams::initParametersF32GemmGfx1150[] = { + "v3:64,64,128,16,2,4,1,1,2", + "v3:64,128,32,16,2,2,1,1,2", "v3:64,64,128,4,2,2,1,1,2", - "v3:128,128,128,16,4,2,1,1,2", - "v3:64,64,128,16,2,2,1,1,2", - "v3:64,128,64,16,2,2,1,1,2", - "v3:64,64,64,8,2,2,1,1,2", - "v3:64,128,32,16,2,4,1,1,2", - "v3:256,128,128,8,4,4,1,1,2", "v3:128,32,32,16,2,4,1,1,2", - "v3:64,32,64,8,4,4,1,1,2", - "v3:128,64,128,4,2,2,1,1,2", - "v3:256,64,128,16,4,4,1,1,2", - "v3:128,64,64,4,4,2,1,1,2", - "v3:64,32,32,16,4,2,1,1,2" + "v3:64,64,64,16,2,2,1,1,2", + "v3:128,128,128,8,2,4,1,1,2", + "v3:64,64,128,4,2,4,1,1,2", + "v3:64,64,128,4,4,4,1,1,2", + "v3:64,64,64,4,2,2,1,1,2", + "v3:64,128,64,8,2,2,1,1,2", + "v3:256,32,64,16,2,4,1,1,2", + "v3:64,128,64,4,2,2,1,1,2", + "v3:64,64,32,16,2,2,1,1,2", + "v3:256,128,128,8,4,4,1,1,2" }; // END_GEMM_NonAccel_f32_gfx1150_DEFS @@ -312,7 +313,7 @@ static const StringRef initParametersF32ConvGfx1151[nInitParametersF32ConvGfx115 // END_CONV_NonAccel_f32_gfx1151_DECS // BEGIN_GEMM_NonAccel_f32_gfx1150_DECS -static constexpr size_t nInitParametersF32GemmGfx1150 = 13; +static constexpr size_t nInitParametersF32GemmGfx1150 = 14; static const StringRef initParametersF32GemmGfx1150[nInitParametersF32GemmGfx1150]; // END_GEMM_NonAccel_f32_gfx1150_DECS @@ -2180,46 +2181,52 @@ const StringRef PopulateParamsWmma::initParametersI8ConvGfx1151[] = { const StringRef PopulateParamsWmma::initParametersF16GemmGfx1150[] = { "v4:64,128,8,32,64,16,8,1,1,2,0,0,1,1", "v4:128,256,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:128,256,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,256,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:32,256,4,16,128,16,8,1,1,2,0,0,1,1", "v4:64,128,4,32,64,16,8,1,1,2,0,0,1,1", - "v4:64,64,4,64,64,16,8,1,1,2,0,0,1,1", - "v4:64,256,8,16,128,16,8,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:128,64,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:128,256,2,128,32,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,32,64,16,8,1,1,2,0,0,1,1", "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:256,128,4,128,64,16,4,1,1,2,0,0,1,1", - "v4:32,64,4,32,64,16,16,1,1,2,0,0,1,1", - "v4:256,128,4,64,64,16,16,1,1,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,2,2,0,0,1,1", - "v4:128,64,4,32,32,16,4,1,1,2,0,0,1,1", - "v4:64,32,4,16,32,16,4,1,1,2,0,0,1,1", + "v4:64,32,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,32,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:128,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,2,128,64,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:256,256,2,128,32,16,8,1,2,2,0,0,1,1", + "v4:256,256,2,256,32,16,8,1,1,2,0,0,1,1", "v4:128,128,8,64,32,16,16,1,1,2,0,0,1,1", - "v4:256,64,8,128,32,16,8,1,1,2,0,0,1,1" + "v4:256,128,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,2,2,0,0,1,1", + "v4:256,64,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,1,2,0,0,1,1" }; // END_GEMM_Wmma_f16_gfx1150_DEFS // BEGIN_GEMM_Wmma_i8_gfx1150_DEFS const StringRef PopulateParamsWmma::initParametersI8GemmGfx1150[] = { + "v4:64,128,4,32,64,16,16,1,1,2,0,0,1,1", "v4:128,256,4,128,32,16,16,1,1,2,0,0,1,1", - "v4:128,64,2,64,32,16,16,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,64,8,128,16,16,16,1,1,2,0,0,1,1", - "v4:128,128,4,128,32,16,16,1,1,2,0,0,1,1", - "v4:32,256,2,32,64,16,16,1,1,2,0,0,1,1", - "v4:32,64,4,32,16,16,16,1,2,2,0,0,1,1", - "v4:64,128,2,64,32,16,16,1,2,2,0,0,1,1", - "v4:256,256,4,128,32,16,16,1,1,2,0,0,1,1", - "v4:32,64,8,16,64,16,16,1,1,2,0,0,1,1", - "v4:128,128,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:64,128,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:256,64,4,256,16,16,16,1,1,2,0,0,1,1", + "v4:128,64,2,128,32,16,16,1,1,2,0,0,1,1", + "v4:256,64,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:64,16,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,16,64,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:256,256,2,128,32,16,16,1,2,2,0,0,1,1", + "v4:128,64,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,64,8,16,32,16,16,1,2,2,0,0,1,1", "v4:64,256,8,16,128,16,16,1,1,2,0,0,1,1", - "v4:64,64,4,64,64,16,16,1,1,2,0,0,1,1", - "v4:256,32,4,128,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,2,2,0,0,1,1", - "v4:256,64,4,128,16,16,16,1,1,2,0,0,1,1" + "v4:16,256,8,16,256,16,16,1,1,2,0,0,1,1", + "v4:16,256,8,16,256,16,16,1,2,2,0,0,1,1", + "v4:256,256,2,128,256,16,8,1,1,2,0,0,1,1", + "v4:256,256,2,256,128,16,16,1,2,2,0,0,1,1", + "v4:256,256,2,256,128,16,8,1,1,2,0,0,1,1", + "v4:256,256,4,128,256,16,16,1,2,2,0,0,1,1", + "v4:256,256,4,256,128,16,16,1,2,2,0,0,1,1" }; // END_GEMM_Wmma_i8_gfx1150_DEFS @@ -2615,7 +2622,7 @@ static const StringRef initParametersF16GemmGfx1150[nInitParametersF16GemmGfx115 // END_GEMM_Wmma_f16_gfx1150_DECS // BEGIN_GEMM_Wmma_i8_gfx1150_DECS -static constexpr size_t nInitParametersI8GemmGfx1150 = 16; +static constexpr size_t nInitParametersI8GemmGfx1150 = 22; static const StringRef initParametersI8GemmGfx1150[nInitParametersI8GemmGfx1150]; // END_GEMM_Wmma_i8_gfx1150_DECS From ceabe5e24d1fe345e4be3f760b2affa0baa29a92 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Thu, 29 Jan 2026 22:22:36 +0000 Subject: [PATCH 08/10] Update gfx1150 quick-tune lists for conv. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 229 +++++++++--------- 1 file changed, 114 insertions(+), 115 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index d09aa582a266..7dea75cb8e10 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -179,48 +179,63 @@ const StringRef PopulateParams::initParametersF32GemmGfx1150[] = { // BEGIN_CONV_NonAccel_f32_gfx1150_DEFS const StringRef PopulateParams::initParametersF32ConvGfx1150[] = { - "v3:64,64,128,16,2,2,1,1,2", - "v3:64,64,128,8,2,2,1,1,2", "v3:64,64,128,4,2,2,1,1,2", - "v3:64,64,128,4,2,4,1,1,2", - "v3:64,64,128,16,4,2,1,1,2", - "v3:64,64,128,8,2,4,1,1,2", - "v3:64,128,64,16,2,2,1,1,2", + "v3:64,64,128,8,2,2,1,1,2", "v3:64,64,128,16,4,4,1,1,2", - "v3:64,64,128,4,4,2,1,1,2", - "v3:128,128,128,16,2,4,1,1,2", - "v3:128,128,128,16,4,2,1,1,2", - "v3:64,64,64,4,2,4,1,1,2", - "v3:64,32,128,4,2,2,1,1,2", - "v3:64,64,64,8,4,2,1,1,2", - "v3:64,64,64,16,2,2,1,1,2", + "v3:64,64,128,16,2,2,1,1,2", "v3:128,128,128,4,2,2,1,1,2", - "v3:64,128,64,16,4,2,1,1,2", - "v3:128,128,128,4,2,4,1,1,2", - "v3:64,32,64,4,2,2,1,1,2", - "v3:64,64,32,16,2,2,1,1,2", + "v3:64,64,128,8,2,4,1,1,2", + "v3:64,128,64,16,2,4,1,1,2", + "v3:64,64,128,8,4,2,1,1,2", + "v3:128,64,128,4,2,2,1,1,2", + "v3:64,128,64,16,4,4,1,1,2", + "v3:64,64,64,16,2,2,1,1,2", + "v3:64,128,64,8,2,2,1,1,2", + "v3:128,64,128,8,2,2,1,1,2", + "v3:64,64,64,8,2,4,1,1,2", + "v3:128,128,128,4,4,2,1,1,2", + "v3:64,64,128,4,4,4,1,1,2", + "v3:128,64,128,8,2,4,1,1,2", + "v3:128,64,128,4,2,4,1,1,2", + "v3:64,32,128,8,2,2,1,1,2", + "v3:64,64,64,4,2,2,1,1,2", + "v3:64,64,64,8,4,2,1,1,2", + "v3:64,64,64,16,4,2,1,1,2", + "v3:128,128,128,16,2,2,1,1,2", + "v3:128,64,128,4,4,2,1,1,2", + "v3:64,64,64,8,4,4,1,1,2", + "v3:64,32,128,4,2,2,1,1,2", + "v3:128,64,128,4,4,4,1,1,2", + "v3:128,128,128,16,4,2,1,1,2", + "v3:128,64,128,8,4,4,1,1,2", + "v3:256,128,128,8,2,2,1,1,2", + "v3:256,64,128,8,2,2,1,1,2", + "v3:64,32,128,16,2,2,1,1,2", + "v3:64,128,64,4,4,4,1,1,2", + "v3:256,64,128,8,4,2,1,1,2", + "v3:256,64,64,16,2,4,1,1,2", + "v3:128,64,64,16,2,2,1,1,2", "v3:64,32,64,16,2,2,1,1,2", + "v3:128,32,32,16,2,2,1,1,2", + "v3:256,32,64,16,2,2,1,1,2", + "v3:128,128,128,8,4,4,1,1,2", + "v3:64,128,32,16,2,4,1,1,2", + "v3:128,128,64,16,2,2,1,1,2", + "v3:256,64,128,16,4,2,1,1,2", + "v3:128,32,64,16,2,4,1,1,2", + "v3:64,128,32,16,4,4,1,1,2", + "v3:128,64,128,16,4,4,1,1,2", "v3:128,128,64,8,4,2,1,1,2", - "v3:64,128,64,4,4,4,1,1,2", - "v3:256,128,128,8,2,2,1,1,2", - "v3:64,128,32,16,4,2,1,1,2", - "v3:64,64,32,8,2,4,1,1,2", - "v3:128,32,32,16,2,4,1,1,2", - "v3:128,64,64,16,2,4,1,1,2", - "v3:128,64,32,16,2,2,1,1,2", - "v3:64,32,32,8,2,2,1,1,2", - "v3:64,128,32,8,4,2,1,1,2", - "v3:256,128,128,4,2,4,1,1,2", + "v3:128,32,128,16,2,2,1,1,2", + "v3:128,64,32,16,4,2,1,1,2", + "v3:256,32,32,16,2,2,1,1,2", + "v3:128,128,32,16,4,2,1,1,2", + "v3:64,64,32,4,4,2,1,1,2", "v3:128,128,32,16,2,4,1,1,2", - "v3:256,128,128,16,4,2,1,1,2", - "v3:64,128,32,8,2,4,1,1,2", - "v3:128,64,64,16,4,2,1,1,2", - "v3:256,64,64,16,2,4,1,1,2", - "v3:128,64,64,8,4,4,1,1,2", - "v3:256,32,128,8,2,2,1,1,2", - "v3:256,32,64,8,2,2,1,1,2", - "v3:256,64,32,16,2,2,1,1,2", - "v3:128,64,32,4,2,2,1,1,2" + "v3:128,128,64,4,4,4,1,1,2", + "v3:64,32,64,4,2,2,1,1,2", + "v3:64,32,64,4,4,4,1,1,2", + "v3:64,32,32,8,4,4,1,1,2" }; // END_CONV_NonAccel_f32_gfx1150_DEFS @@ -318,7 +333,7 @@ static const StringRef initParametersF32GemmGfx1150[nInitParametersF32GemmGfx115 // END_GEMM_NonAccel_f32_gfx1150_DECS // BEGIN_CONV_NonAccel_f32_gfx1150_DECS -static constexpr size_t nInitParametersF32ConvGfx1150 = 42; +static constexpr size_t nInitParametersF32ConvGfx1150 = 57; static const StringRef initParametersF32ConvGfx1150[nInitParametersF32ConvGfx1150]; // END_CONV_NonAccel_f32_gfx1150_DECS @@ -2232,91 +2247,75 @@ const StringRef PopulateParamsWmma::initParametersI8GemmGfx1150[] = { // BEGIN_CONV_Wmma_f16_gfx1150_DEFS const StringRef PopulateParamsWmma::initParametersF16ConvGfx1150[] = { - "v4:64,128,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:128,64,8,128,16,16,8,1,1,2,0,0,1,1", - "v4:128,64,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:64,256,2,64,64,16,8,1,1,2,0,0,1,1", "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,16,128,16,8,1,1,2,0,0,1,1", - "v4:64,256,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:128,64,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:32,64,4,32,64,16,4,1,1,2,0,0,1,1", - "v4:128,256,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,2,128,32,16,16,1,1,2,0,0,1,1", - "v4:256,128,8,128,32,16,8,1,1,2,0,0,1,1", - "v4:128,64,2,128,32,16,8,1,1,2,0,0,1,1", - "v4:256,256,8,128,32,16,8,1,1,2,0,0,1,1", - "v4:64,128,8,64,32,16,8,1,1,2,0,0,1,1", - "v4:64,128,8,64,16,16,8,1,1,2,0,0,1,1", - "v4:32,128,4,32,64,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,64,16,8,1,1,2,0,0,1,1", - "v4:256,128,8,64,32,16,8,1,1,2,0,0,1,1", - "v4:32,128,2,32,128,16,8,1,1,2,0,0,1,1", - "v4:64,128,2,64,32,16,8,1,2,2,0,0,1,1", - "v4:256,64,4,64,64,16,8,1,1,2,0,0,1,1", - "v4:64,128,4,64,16,16,8,1,2,2,0,0,1,1", - "v4:32,128,2,32,32,16,8,1,2,2,0,0,1,1", - "v4:128,64,4,128,32,16,4,1,1,2,0,0,1,1", - "v4:64,256,4,64,16,16,8,1,2,2,0,0,1,1", - "v4:256,128,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:64,32,4,32,32,16,8,1,1,2,0,0,1,1", - "v4:16,128,4,16,128,16,8,1,1,2,0,0,1,1", - "v4:32,256,2,32,128,16,8,1,1,2,0,0,1,1", - "v4:128,64,2,32,32,16,8,1,1,2,0,0,1,1", - "v4:256,64,8,64,32,16,8,1,1,2,0,0,1,1", - "v4:32,32,4,32,16,16,8,1,2,2,0,0,1,1", - "v4:256,64,2,32,64,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,16,64,16,8,1,2,2,0,0,1,1", "v4:64,32,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:16,32,8,16,32,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,16,32,16,8,1,2,2,0,0,1,1", - "v4:16,32,4,16,32,16,8,1,2,2,0,0,1,1", - "v4:128,256,8,128,32,16,8,1,1,2,0,0,1,1", - "v4:64,128,8,16,128,16,8,1,2,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:256,32,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:256,128,8,32,64,16,8,1,1,2,0,0,1,1", - "v4:32,128,8,32,32,16,8,1,2,2,0,0,1,1", - "v4:32,16,4,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,64,2,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,32,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,64,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,16,16,1,1,2,0,0,1,1", + "v4:128,64,2,128,32,16,16,1,1,2,0,0,1,1", + "v4:64,128,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,64,64,16,8,1,2,2,0,0,1,1", + "v4:32,128,4,32,32,16,8,1,1,2,0,0,1,1", + "v4:256,64,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:32,64,2,32,64,16,8,1,1,2,0,0,1,1", + "v4:32,128,8,16,32,16,8,1,1,2,0,0,1,1", + "v4:256,64,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,4,1,2,2,0,0,1,1", + "v4:16,128,8,16,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,2,32,64,16,8,1,1,2,0,0,1,1", + "v4:256,128,2,64,64,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,32,16,16,8,1,1,2,0,0,1,1", + "v4:16,128,4,16,32,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,128,64,16,4,1,1,2,0,0,1,1", + "v4:32,128,2,32,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:32,128,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:256,128,4,256,32,16,4,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,4,1,1,2,0,0,1,1", "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,64,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:256,32,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:16,128,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:64,32,8,32,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:128,32,4,16,32,16,16,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:128,16,4,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,32,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1" + "v4:128,128,2,128,64,16,16,1,1,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,256,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,32,32,16,4,1,1,2,0,0,1,1", + "v4:256,128,8,256,16,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,64,8,16,32,16,8,1,2,2,0,0,1,1", + "v4:256,64,4,128,32,16,4,1,1,2,0,0,1,1", + "v4:128,256,2,32,256,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,128,2,64,128,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,256,32,16,8,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,128,32,16,16,1,2,2,0,0,1,1" }; // END_CONV_Wmma_f16_gfx1150_DEFS // BEGIN_CONV_Wmma_i8_gfx1150_DEFS const StringRef PopulateParamsWmma::initParametersI8ConvGfx1150[] = { - "v4:128,32,4,32,32,16,16,1,2,2,0,0,1,1", - "v4:16,128,4,16,128,16,4,1,1,2,0,0,1,1", - "v4:256,32,2,32,32,16,16,1,1,2,0,0,1,1", - "v4:64,128,4,32,64,16,8,1,1,2,0,0,1,1", - "v4:256,64,2,64,64,16,16,1,2,2,0,0,1,1", - "v4:64,64,4,16,64,16,16,1,1,2,0,0,1,1", - "v4:128,256,2,128,64,16,8,1,1,2,0,0,1,1", - "v4:128,64,8,128,32,16,4,1,2,2,0,0,1,1", - "v4:64,64,8,16,64,16,16,1,1,2,0,0,1,1", - "v4:128,64,4,64,64,16,4,1,1,2,0,0,1,1", - "v4:128,64,8,64,16,16,16,1,1,2,0,0,1,1", - "v4:128,128,2,128,32,16,8,1,2,2,0,0,1,1", - "v4:256,64,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:256,32,4,64,32,16,16,1,1,2,0,0,1,1", - "v4:256,32,8,16,32,16,16,1,1,2,0,0,1,1", - "v4:32,256,4,32,128,16,4,1,2,2,0,0,1,1", + "v4:256,64,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:256,64,8,64,32,16,16,1,1,2,0,0,1,1", + "v4:64,32,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,128,32,16,4,1,1,2,0,0,1,1", + "v4:64,32,4,64,32,16,4,1,1,2,0,0,1,1", + "v4:64,32,8,16,32,16,16,1,2,2,0,0,1,1", + "v4:64,128,8,32,16,16,16,1,2,2,0,0,1,1", "v4:64,128,2,32,64,16,16,1,2,2,0,0,1,1", - "v4:128,16,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:256,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:256,32,8,128,32,16,16,1,1,2,0,0,1,1" + "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,128,4,32,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,128,64,16,8,1,1,2,0,0,1,1", + "v4:256,32,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:64,128,4,64,128,16,4,1,1,2,0,0,1,1", + "v4:256,16,8,32,16,16,16,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,2,2,0,0,1,1", + "v4:16,256,4,16,256,16,16,1,1,2,0,0,1,1" }; // END_CONV_Wmma_i8_gfx1150_DEFS @@ -2627,12 +2626,12 @@ static const StringRef initParametersI8GemmGfx1150[nInitParametersI8GemmGfx1150] // END_GEMM_Wmma_i8_gfx1150_DECS // BEGIN_CONV_Wmma_f16_gfx1150_DECS -static constexpr size_t nInitParametersF16ConvGfx1150 = 59; +static constexpr size_t nInitParametersF16ConvGfx1150 = 48; static const StringRef initParametersF16ConvGfx1150[nInitParametersF16ConvGfx1150]; // END_CONV_Wmma_f16_gfx1150_DECS // BEGIN_CONV_Wmma_i8_gfx1150_DECS -static constexpr size_t nInitParametersI8ConvGfx1150 = 21; +static constexpr size_t nInitParametersI8ConvGfx1150 = 16; static const StringRef initParametersI8ConvGfx1150[nInitParametersI8ConvGfx1150]; // END_CONV_Wmma_i8_gfx1150_DECS From f233e229f81c36a8183a9aa3c20798878e9f0508 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Tue, 3 Feb 2026 19:58:43 +0000 Subject: [PATCH 09/10] Fix tests. --- .../Dialect/Rock/affix_tuning_params.mlir | 76 +++++++++---------- .../noTransA-noTransB/broadcasted-k-e2e.mlir | 2 +- .../noTransA-transB/broadcasted-k-e2e.mlir | 2 +- .../transA-noTransB/gemm-k-e2e.mlir | 2 +- .../transA-noTransB/sliced-k-e2e.mlir | 2 +- .../transA-noTransB/unitdim-m-e2e.mlir | 2 +- .../transA-transB/gemm-k-e2e.mlir | 2 +- .../transA-transB/sliced-k-e2e.mlir | 2 +- 8 files changed, 45 insertions(+), 45 deletions(-) diff --git a/mlir/test/Dialect/Rock/affix_tuning_params.mlir b/mlir/test/Dialect/Rock/affix_tuning_params.mlir index 7610a0d05710..eef5f7ad621d 100644 --- a/mlir/test/Dialect/Rock/affix_tuning_params.mlir +++ b/mlir/test/Dialect/Rock/affix_tuning_params.mlir @@ -10,7 +10,7 @@ // GRID-LABEL: rock_conv func.func @rock_conv(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 1800 rock.conv(%filter, %input, %output) features = none { @@ -28,7 +28,7 @@ func.func @rock_conv(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x // GRID-LABEL: rock_conv_schedulev2 func.func @rock_conv_schedulev2(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {schedule_version = #rock.schedule_version<2>, arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 1800 rock.conv(%filter, %input, %output) features = none { @@ -46,7 +46,7 @@ func.func @rock_conv_schedulev2(%filter : memref<1x128x8x3x3xf32>, %input : memr // GRID-LABEL: func.func @rock_conv_f16 func.func @rock_conv_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x1x8x32x32xf16>, %output : memref<128x1x128x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 1800 rock.conv(%filter, %input, %output) features = none { @@ -64,10 +64,10 @@ func.func @rock_conv_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x // GRID-LABEL: func.func @rock_conv_i8 func.func @rock_conv_i8(%filter : memref<1x128x8x3x3xi8>, %input : memref<128x1x8x32x32xi8>, %output : memref<128x1x128x30x30xi32>) attributes {arch = "amdgcn-amd-amdhsa:gfx908"} { // CHECK: rock.conv - // CHECK-SAME: derivedBlockSize = 64 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 256 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 3600 + // GRID-SAME: gridSize = 900 rock.conv(%filter, %input, %output) features = mfma|dot|atomic_add|atomic_add_f16 { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -84,9 +84,9 @@ func.func @rock_conv_i8(%filter : memref<1x128x8x3x3xi8>, %input : memref<128x1x func.func @rock_conv_bwd_data(%filter: memref<1x1024x1024x1x1xf32>, %input: memref<128x1x1024x14x14xf32>, %output: memref<128x1x1024x14x14xf32>) attributes {kernel = 0 : i32, arch = "amdgcn-amd-amdhsa:gfx908"} { // CHECK: rock.conv_bwd_data // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 25088 + // GRID-SAME: gridSize = 6272 rock.conv_bwd_data(%filter, %input, %output) features = mfma|dot|atomic_add|atomic_add_f16 { dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "c", "0", "1"], @@ -105,9 +105,9 @@ func.func @rock_conv_bwd_data(%filter: memref<1x1024x1024x1x1xf32>, %input: memr func.func @rock_conv_bwd_data_f16(%filter: memref<1x1024x1024x1x1xf16>, %input: memref<128x1x1024x14x14xf16>, %output: memref<128x1x1024x14x14xf16>) attributes {kernel = 0 : i32, arch = "amdgcn-amd-amdhsa:gfx908"} { // CHECK: rock.conv_bwd_data // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 25088 + // GRID-SAME: gridSize = 12544 rock.conv_bwd_data(%filter, %input, %output) features = mfma|dot|atomic_add|atomic_add_f16 { dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "c", "0", "1"], @@ -125,9 +125,9 @@ func.func @rock_conv_bwd_data_f16(%filter: memref<1x1024x1024x1x1xf16>, %input: // GRID-LABEL: func.func @rock_conv_bwd_data_padMN func.func @rock_conv_bwd_data_padMN(%filter : memref<1x64x3x1x1xf32>, %input : memref<11x1x3x15x15xf32>, %output : memref<11x1x64x15x15xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv_bwd_data - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 39 + // GRID-SAME: gridSize = 78 rock.conv_bwd_data(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -145,9 +145,9 @@ func.func @rock_conv_bwd_data_padMN(%filter : memref<1x64x3x1x1xf32>, %input : m // GRID-LABEL: @rock_conv_bwd_data_padMK func.func @rock_conv_bwd_data_padMK(%filter : memref<1x11x3x1x1xf32>, %input : memref<128x1x3x15x15xf32>, %output : memref<128x1x11x15x15xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv_bwd_data - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 225 + // GRID-SAME: gridSize = 450 rock.conv_bwd_data(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -165,9 +165,9 @@ func.func @rock_conv_bwd_data_padMK(%filter : memref<1x11x3x1x1xf32>, %input : m // GRID-LABEL: @rock_conv_bwd_weight func.func @rock_conv_bwd_weight(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 6 + // GRID-SAME: gridSize = 12 rock.conv_bwd_weight(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -183,9 +183,9 @@ func.func @rock_conv_bwd_weight(%filter : memref<1x128x8x3x3xf32>, %input : memr // GRID-LABEL: @rock_conv_bwd_weight_f16 func.func @rock_conv_bwd_weight_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x1x8x32x32xf16>, %output : memref<128x1x128x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 6 + // GRID-SAME: gridSize = 12 rock.conv_bwd_weight(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -201,7 +201,7 @@ func.func @rock_conv_bwd_weight_f16(%filter : memref<1x128x8x3x3xf16>, %input : // GRID-LABEL: func.func @rock_conv_bwd_weight_padALL func.func @rock_conv_bwd_weight_padALL(%filter : memref<1x20x8x3x3xf32>, %input : memref<7x1x8x32x32xf32>, %output : memref<7x1x20x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 3 rock.conv_bwd_weight(%filter, %input, %output) features = none { @@ -219,7 +219,7 @@ func.func @rock_conv_bwd_weight_padALL(%filter : memref<1x20x8x3x3xf32>, %input // GRID-LABEL: @rock_conv_bwd_weight_padALL_f16 func.func @rock_conv_bwd_weight_padALL_f16(%filter : memref<1x20x8x3x3xf16>, %input : memref<7x1x8x32x32xf16>, %output : memref<7x1x20x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 3 rock.conv_bwd_weight(%filter, %input, %output) features = none { @@ -259,10 +259,10 @@ func.func @rock_conv_7x7_tuning(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<256 // GRID-LABEL: @rock_conv_7x7 func.func @rock_conv_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<256x1x3x230x230xf32>, %arg2: memref<256x1x64x112x112xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx908"} { // CHECK: rock.conv - // CHECK-SAME: derivedBlockSize = 64 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 256 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 100352 + // GRID-SAME: gridSize = 12544 rock.conv(%arg0, %arg1, %arg2) features = mfma|dot|atomic_add|atomic_add_f16 { dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "c", "0", "1"], @@ -279,7 +279,7 @@ func.func @rock_conv_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<256x1x3x23 func.func @rock_conv_bwd_weight_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<256x1x3x230x230xf32>, %arg2: memref<256x1x64x112x112xf32>) attributes {kernel = 0 : i32, arch = "amdgcn-amd-amdhsa:gfx908", numCU = 120 : i32} { // CHECK: rock.conv_bwd_weight // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 10 rock.conv_bwd_weight(%arg0, %arg1, %arg2) features = mfma|dot|atomic_add|atomic_add_f16 { @@ -319,10 +319,10 @@ func.func @rock_conv_bwd_data_7x7_tuning(%arg0: memref<1x64x3x7x7xf32>, %arg1: m // GRID-LABEL: @rock_conv_bwd_data_7x7 func.func @rock_conv_bwd_data_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<256x1x3x230x230xf32>, %arg2: memref<256x1x64x112x112xf32>) attributes {kernel = 1 : i32, arch = "amdgcn-amd-amdhsa:gfx908"} { // CHECK: rock.conv_bwd_data - // CHECK-SAME: derivedBlockSize = 64 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 256 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 211600 + // GRID-SAME: gridSize = 52900 rock.conv_bwd_data(%arg0, %arg1, %arg2) features = mfma|dot|atomic_add|atomic_add_f16 { dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "c", "0", "1"], @@ -340,9 +340,9 @@ func.func @rock_conv_bwd_data_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<2 // GRID-LABEL: @rock_gemm_from_conv func.func @rock_gemm_from_conv(%a : memref<1x72x128xf32>, %b : memref<1x72x115200xf32>, %c : memref<1x128x115200xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.gemm - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 900 + // GRID-SAME: gridSize = 1800 rock.gemm %c = tr %a * %b features = none storeMethod = set : memref<1x128x115200xf32> = memref<1x72x128xf32> * memref<1x72x115200xf32> return @@ -352,10 +352,10 @@ func.func @rock_gemm_from_conv(%a : memref<1x72x128xf32>, %b : memref<1x72x11520 // GRID-LABEL: func.func @rock_gemm_from_i8_conv func.func @rock_gemm_from_i8_conv(%a : memref<1x72x128xi8>, %b : memref<1x72x115200xi8>, %c : memref<1x128x115200xi32>) attributes {arch = "amdgcn-amd-amdhsa:gfx908", numCU = 120 : i32} { // CHECK: rock.gemm - // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 512 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 7200 + // GRID-SAME: gridSize = 1800 rock.gemm %c = tr %a * %b features = mfma|dot|atomic_add|atomic_add_f16 storeMethod = set : memref<1x128x115200xi32> = memref<1x72x128xi8> * memref<1x72x115200xi8> return @@ -365,10 +365,10 @@ func.func @rock_gemm_from_i8_conv(%a : memref<1x72x128xi8>, %b : memref<1x72x115 // GRID-LABEL: func.func @rock_gemm_from_i8_conv_schedule_v2 func.func @rock_gemm_from_i8_conv_schedule_v2(%a : memref<1x72x128xi8>, %b : memref<1x72x115200xi8>, %c : memref<1x128x115200xi32>) attributes {schedule_version = #rock.schedule_version<2>, arch = "amdgcn-amd-amdhsa:gfx908", numCU = 120 : i32} { // CHECK: rock.gemm - // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 512 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 7200 + // GRID-SAME: gridSize = 1800 rock.gemm %c = tr %a * %b features = mfma|dot|atomic_add|atomic_add_f16 storeMethod = set : memref<1x128x115200xi32> = memref<1x72x128xi8> * memref<1x72x115200xi8> return @@ -381,10 +381,10 @@ func.func @rock_gemm_from_i8_conv_schedule_v2(%a : memref<1x72x128xi8>, %b : mem // GRID-LABEL: func.func @rock_gemm_from_i8_conv_gfx942 func.func @rock_gemm_from_i8_conv_gfx942(%a : memref<1x72x128xi8>, %b : memref<1x72x115200xi8>, %c : memref<1x128x115200xi32>) attributes {arch = "amdgcn-amd-amdhsa:gfx942", numCU = 120 : i32} { // CHECK: rock.gemm - // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 512 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 14400 + // GRID-SAME: gridSize = 1800 rock.gemm %c = tr %a * %b features = mfma|dot|atomic_add|atomic_add_f16 storeMethod = set : memref<1x128x115200xi32> = memref<1x72x128xi8> * memref<1x72x115200xi8> return diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir index 92be2f1994b5..8ba76962c546 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA false -transB false -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::MorN // VECTORIZATION-NEXT: bVectorLen: 2 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir index 853c3779e9af..778dbac1e10a 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA false -transB true -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::K // VECTORIZATION-NEXT: bVectorLen: 8 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir index 1c1f9e26dc68..6164549e560a 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir @@ -6,7 +6,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB false -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::MorN // VECTORIZATION-NEXT: bVectorLen: 2 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir index 9a89a23dfe17..b35867a25f25 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB false -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::MorN // VECTORIZATION-NEXT: bVectorLen: 2 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/unitdim-m-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/unitdim-m-e2e.mlir index 2eb79a3eab78..a22cf6f17803 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/unitdim-m-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/unitdim-m-e2e.mlir @@ -9,7 +9,7 @@ // VECTORIZATION: aVectorDim: GemmDimension::K // VECTORIZATION-NEXT: aVectorLen: 8 // VECTORIZATION: bVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: bVectorLen: 4 +// VECTORIZATION-NEXT: bVectorLen: 8 module { func.func @test(%arg0: !migraphx.shaped<2x1x320xf16, 320x1x1>, %arg1: !migraphx.shaped<2x640x320xf16, 204800x1x640>, %arg2: !migraphx.shaped<2x64x10xf16, 0x10x1>) -> !migraphx.shaped<2x64x10xf16, 640x10x1> { diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir index 5753af9c6330..62dcc2df3117 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB true -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::K // VECTORIZATION-NEXT: bVectorLen: 8 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir index b19e6a1ec7df..3f95dfc8caae 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB true -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::K // VECTORIZATION-NEXT: bVectorLen: 8 From 44c2246e11b9ba52e0e67104fb80a810f09ff6d8 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Wed, 18 Feb 2026 01:16:47 +0000 Subject: [PATCH 10/10] Fix test. --- mlir/test/CAPI/mixr_full.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/CAPI/mixr_full.c b/mlir/test/CAPI/mixr_full.c index 847a990026ee..30117edc27ac 100644 --- a/mlir/test/CAPI/mixr_full.c +++ b/mlir/test/CAPI/mixr_full.c @@ -194,7 +194,7 @@ static bool constructAndTraverseIr(MlirContext ctx) { mlirRockTuningSpaceCreate(module, RocmlirTuningParamSetKindFull); printf("Got tuning space,\n"); unsigned fNum = mlirRockTuningGetNumParams(tuningSpace); - // CHECK: full set = 932 + // CHECK: full set = 937 printf("full set = %u\n", fNum); MlirRockTuningParam tuningParam = mlirRockTuningParamCreate(); MlirRockTuningTable tuningTable = mlirRockTuningTableCreate();