diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 1bb32e598494..21b6d27ddd0e 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -23,24 +23,6 @@ const StringRef PopulateParams::initParametersF32GemmGfx1000[] = { }; // END_GEMM_NonAccel_f32_gfx1000_DEFS -// BEGIN_GEMM_NonAccel_f32_gfx1100_DEFS -const StringRef PopulateParams::initParametersF32GemmGfx1100[] = { - "v3:64,64,32,16,2,2,1,1,2", - "v3:64,64,64,4,2,2,1,1,2", - "v3:128,128,128,16,2,2,1,1,2", - "v3:64,32,128,16,2,2,1,1,2", - "v3:128,128,32,16,4,2,1,1,2", - "v3:64,32,64,8,4,4,1,1,2", - "v3:64,64,128,8,4,2,1,1,2", - "v3:64,32,32,4,2,2,1,1,2", - "v3:128,64,64,4,2,2,1,1,2", - "v3:64,32,32,16,2,4,1,1,2", - "v3:128,32,64,16,2,4,1,1,2", - "v3:256,64,32,16,4,2,1,1,2", - "v3:256,64,128,8,2,2,1,1,2" -}; -// END_GEMM_NonAccel_f32_gfx1100_DEFS - // BEGIN_CONV_NonAccel_f32_gfx1000_DEFS const StringRef PopulateParams::initParametersF32ConvGfx1000[] = { "v3:128,128,128,4,2,4,1,1,2", @@ -70,82 +52,51 @@ const StringRef PopulateParams::initParametersF32ConvGfx1000[] = { }; // END_CONV_NonAccel_f32_gfx1000_DEFS -// BEGIN_CONV_NonAccel_f32_gfx1100_DEFS -const StringRef PopulateParams::initParametersF32ConvGfx1100[] = { - "v3:64,32,32,8,4,4,1,1,2", - "v3:64,32,32,8,2,2,1,1,2", - "v3:128,32,32,4,2,2,1,1,2", - "v3:128,32,64,4,2,2,1,1,2", - "v3:64,32,32,16,2,4,1,1,2", - "v3:64,32,32,4,2,4,1,1,2", - "v3:64,64,32,16,2,2,1,1,2", - "v3:64,64,128,4,2,2,1,1,2", - "v3:128,32,32,16,2,2,1,1,2", - "v3:64,32,64,8,4,2,1,1,2", - "v3:128,32,32,16,2,4,1,1,2", - "v3:64,64,64,16,2,2,1,1,2", - "v3:64,64,64,4,4,2,1,1,2", - "v3:128,32,32,8,2,4,1,1,2", - "v3:128,128,128,8,2,2,1,1,2", - "v3:256,32,32,16,2,2,1,1,2", - "v3:64,128,64,8,2,4,1,1,2", - "v3:64,64,64,16,2,4,1,1,2", - "v3:256,32,32,8,2,2,1,1,2", - "v3:64,64,128,16,4,4,1,1,2", - "v3:128,64,128,16,2,2,1,1,2", - "v3:128,128,32,16,2,4,1,1,2", - "v3:64,32,32,4,2,2,1,1,2", - "v3:64,64,64,4,2,2,1,1,2", - "v3:128,32,32,4,2,4,1,1,2" -}; -// END_CONV_NonAccel_f32_gfx1100_DEFS - // BEGIN_GEMM_NonAccel_f32_gfx1201_DEFS const StringRef PopulateParams::initParametersF32GemmGfx1201[] = { - "v3:128,128,128,16,2,2,1,1,2", + "v3:64,64,128,4,2,4,1,1,2", + "v3:128,128,128,8,2,4,1,1,2", "v3:64,128,64,16,2,2,1,1,2", - "v3:64,64,32,16,2,2,1,1,2", - "v3:128,128,128,4,4,2,1,1,2", - "v3:64,64,64,16,4,4,1,1,2", - "v3:64,64,128,16,4,4,1,1,2", - "v3:64,64,64,8,2,4,1,1,2", - "v3:128,128,64,8,2,4,1,1,2", "v3:128,32,32,16,2,2,1,1,2", - "v3:64,32,32,8,2,2,1,1,2" + "v3:64,64,128,16,2,2,1,1,2", + "v3:64,128,64,16,4,4,1,1,2", + "v3:64,128,64,4,2,4,1,1,2", + "v3:64,32,64,8,2,4,1,1,2", + "v3:64,32,128,8,2,2,1,1,2", + "v3:64,64,64,8,2,4,1,1,2", + "v3:128,64,64,8,2,2,1,1,2", + "v3:256,64,64,8,4,2,1,1,2" }; // END_GEMM_NonAccel_f32_gfx1201_DEFS // BEGIN_CONV_NonAccel_f32_gfx1201_DEFS const StringRef PopulateParams::initParametersF32ConvGfx1201[] = { - "v3:64,64,128,4,2,4,1,1,2", - "v3:64,64,128,8,2,4,1,1,2", + "v3:64,64,128,4,2,2,1,1,2", + "v3:128,64,64,8,2,2,1,1,2", + "v3:64,64,128,8,2,2,1,1,2", + "v3:64,64,64,16,2,2,1,1,2", + "v3:64,64,64,16,2,4,1,1,2", "v3:64,64,64,4,4,4,1,1,2", - "v3:64,128,64,16,2,2,1,1,2", - "v3:64,64,128,8,4,4,1,1,2", + "v3:64,32,128,8,2,2,1,1,2", "v3:64,64,128,16,2,2,1,1,2", - "v3:64,64,64,4,2,2,1,1,2", - "v3:64,64,128,16,2,4,1,1,2", - "v3:64,64,64,16,2,2,1,1,2", - "v3:64,32,128,4,2,2,1,1,2", - "v3:128,128,128,16,2,2,1,1,2", - "v3:128,128,128,16,4,2,1,1,2", - "v3:128,128,128,8,2,2,1,1,2", - "v3:64,64,32,8,2,2,1,1,2", - "v3:64,64,32,16,2,4,1,1,2", - "v3:128,128,64,8,4,4,1,1,2", - "v3:64,32,64,8,2,4,1,1,2", - "v3:64,64,64,16,4,4,1,1,2", - "v3:128,64,64,8,2,4,1,1,2", + "v3:64,32,32,8,2,2,1,1,2", + "v3:64,32,64,8,4,4,1,1,2", + "v3:128,128,128,16,2,4,1,1,2", "v3:128,128,128,4,2,2,1,1,2", - "v3:128,128,32,16,2,4,1,1,2", + "v3:64,64,128,16,2,4,1,1,2", "v3:128,32,32,16,2,4,1,1,2", - "v3:64,128,64,8,4,2,1,1,2", - "v3:64,32,32,8,2,2,1,1,2", + "v3:64,32,64,4,4,2,1,1,2", "v3:256,128,128,8,4,2,1,1,2", - "v3:128,64,32,16,2,2,1,1,2", - "v3:128,128,64,16,2,4,1,1,2", - "v3:256,128,128,16,4,4,1,1,2", - "v3:256,32,32,16,2,2,1,1,2" + "v3:128,64,128,8,4,4,1,1,2", + "v3:64,32,128,4,4,2,1,1,2", + "v3:128,128,128,8,2,4,1,1,2", + "v3:64,128,64,16,4,4,1,1,2", + "v3:64,32,32,16,2,2,1,1,2", + "v3:128,128,64,8,2,4,1,1,2", + "v3:64,64,32,8,2,2,1,1,2", + "v3:256,128,128,16,2,4,1,1,2", + "v3:128,128,64,8,4,4,1,1,2", + "v3:128,128,32,16,2,2,1,1,2" }; // END_CONV_NonAccel_f32_gfx1201_DEFS @@ -209,69 +160,139 @@ const StringRef PopulateParams::initParametersF32ConvGfx1151[] = { // BEGIN_GEMM_NonAccel_f32_gfx1150_DEFS const StringRef PopulateParams::initParametersF32GemmGfx1150[] = { + "v3:64,64,128,16,2,4,1,1,2", + "v3:64,128,32,16,2,2,1,1,2", "v3:64,64,128,4,2,2,1,1,2", - "v3:128,128,128,16,4,2,1,1,2", - "v3:64,64,128,16,2,2,1,1,2", - "v3:64,128,64,16,2,2,1,1,2", - "v3:64,64,64,8,2,2,1,1,2", - "v3:64,128,32,16,2,4,1,1,2", - "v3:256,128,128,8,4,4,1,1,2", "v3:128,32,32,16,2,4,1,1,2", - "v3:64,32,64,8,4,4,1,1,2", - "v3:128,64,128,4,2,2,1,1,2", - "v3:256,64,128,16,4,4,1,1,2", - "v3:128,64,64,4,4,2,1,1,2", - "v3:64,32,32,16,4,2,1,1,2" + "v3:64,64,64,16,2,2,1,1,2", + "v3:128,128,128,8,2,4,1,1,2", + "v3:64,64,128,4,2,4,1,1,2", + "v3:64,64,128,4,4,4,1,1,2", + "v3:64,64,64,4,2,2,1,1,2", + "v3:64,128,64,8,2,2,1,1,2", + "v3:256,32,64,16,2,4,1,1,2", + "v3:64,128,64,4,2,2,1,1,2", + "v3:64,64,32,16,2,2,1,1,2", + "v3:256,128,128,8,4,4,1,1,2" }; // END_GEMM_NonAccel_f32_gfx1150_DEFS // BEGIN_CONV_NonAccel_f32_gfx1150_DEFS const StringRef PopulateParams::initParametersF32ConvGfx1150[] = { - "v3:64,64,128,16,2,2,1,1,2", - "v3:64,64,128,8,2,2,1,1,2", "v3:64,64,128,4,2,2,1,1,2", - "v3:64,64,128,4,2,4,1,1,2", - "v3:64,64,128,16,4,2,1,1,2", - "v3:64,64,128,8,2,4,1,1,2", - "v3:64,128,64,16,2,2,1,1,2", + "v3:64,64,128,8,2,2,1,1,2", "v3:64,64,128,16,4,4,1,1,2", - "v3:64,64,128,4,4,2,1,1,2", - "v3:128,128,128,16,2,4,1,1,2", - "v3:128,128,128,16,4,2,1,1,2", - "v3:64,64,64,4,2,4,1,1,2", - "v3:64,32,128,4,2,2,1,1,2", - "v3:64,64,64,8,4,2,1,1,2", - "v3:64,64,64,16,2,2,1,1,2", + "v3:64,64,128,16,2,2,1,1,2", "v3:128,128,128,4,2,2,1,1,2", - "v3:64,128,64,16,4,2,1,1,2", - "v3:128,128,128,4,2,4,1,1,2", - "v3:64,32,64,4,2,2,1,1,2", - "v3:64,64,32,16,2,2,1,1,2", + "v3:64,64,128,8,2,4,1,1,2", + "v3:64,128,64,16,2,4,1,1,2", + "v3:64,64,128,8,4,2,1,1,2", + "v3:128,64,128,4,2,2,1,1,2", + "v3:64,128,64,16,4,4,1,1,2", + "v3:64,64,64,16,2,2,1,1,2", + "v3:64,128,64,8,2,2,1,1,2", + "v3:128,64,128,8,2,2,1,1,2", + "v3:64,64,64,8,2,4,1,1,2", + "v3:128,128,128,4,4,2,1,1,2", + "v3:64,64,128,4,4,4,1,1,2", + "v3:128,64,128,8,2,4,1,1,2", + "v3:128,64,128,4,2,4,1,1,2", + "v3:64,32,128,8,2,2,1,1,2", + "v3:64,64,64,4,2,2,1,1,2", + "v3:64,64,64,8,4,2,1,1,2", + "v3:64,64,64,16,4,2,1,1,2", + "v3:128,128,128,16,2,2,1,1,2", + "v3:128,64,128,4,4,2,1,1,2", + "v3:64,64,64,8,4,4,1,1,2", + "v3:64,32,128,4,2,2,1,1,2", + "v3:128,64,128,4,4,4,1,1,2", + "v3:128,128,128,16,4,2,1,1,2", + "v3:128,64,128,8,4,4,1,1,2", + "v3:256,128,128,8,2,2,1,1,2", + "v3:256,64,128,8,2,2,1,1,2", + "v3:64,32,128,16,2,2,1,1,2", + "v3:64,128,64,4,4,4,1,1,2", + "v3:256,64,128,8,4,2,1,1,2", + "v3:256,64,64,16,2,4,1,1,2", + "v3:128,64,64,16,2,2,1,1,2", "v3:64,32,64,16,2,2,1,1,2", + "v3:128,32,32,16,2,2,1,1,2", + "v3:256,32,64,16,2,2,1,1,2", + "v3:128,128,128,8,4,4,1,1,2", + "v3:64,128,32,16,2,4,1,1,2", + "v3:128,128,64,16,2,2,1,1,2", + "v3:256,64,128,16,4,2,1,1,2", + "v3:128,32,64,16,2,4,1,1,2", + "v3:64,128,32,16,4,4,1,1,2", + "v3:128,64,128,16,4,4,1,1,2", "v3:128,128,64,8,4,2,1,1,2", - "v3:64,128,64,4,4,4,1,1,2", - "v3:256,128,128,8,2,2,1,1,2", - "v3:64,128,32,16,4,2,1,1,2", - "v3:64,64,32,8,2,4,1,1,2", - "v3:128,32,32,16,2,4,1,1,2", - "v3:128,64,64,16,2,4,1,1,2", - "v3:128,64,32,16,2,2,1,1,2", - "v3:64,32,32,8,2,2,1,1,2", - "v3:64,128,32,8,4,2,1,1,2", - "v3:256,128,128,4,2,4,1,1,2", + "v3:128,32,128,16,2,2,1,1,2", + "v3:128,64,32,16,4,2,1,1,2", + "v3:256,32,32,16,2,2,1,1,2", + "v3:128,128,32,16,4,2,1,1,2", + "v3:64,64,32,4,4,2,1,1,2", "v3:128,128,32,16,2,4,1,1,2", - "v3:256,128,128,16,4,2,1,1,2", - "v3:64,128,32,8,2,4,1,1,2", - "v3:128,64,64,16,4,2,1,1,2", - "v3:256,64,64,16,2,4,1,1,2", - "v3:128,64,64,8,4,4,1,1,2", - "v3:256,32,128,8,2,2,1,1,2", - "v3:256,32,64,8,2,2,1,1,2", - "v3:256,64,32,16,2,2,1,1,2", - "v3:128,64,32,4,2,2,1,1,2" + "v3:128,128,64,4,4,4,1,1,2", + "v3:64,32,64,4,2,2,1,1,2", + "v3:64,32,64,4,4,4,1,1,2", + "v3:64,32,32,8,4,4,1,1,2" }; // END_CONV_NonAccel_f32_gfx1150_DEFS +// BEGIN_GEMM_NonAccel_f32_gfx1101_DEFS +const StringRef PopulateParams::initParametersF32GemmGfx1101[] = { + "v3:128,128,128,8,2,4,1,1,2", + "v3:128,32,32,16,2,2,1,1,2", + "v3:64,128,64,16,2,4,1,1,2", + "v3:64,64,128,4,2,2,1,1,2", + "v3:64,64,128,8,2,4,1,1,2", + "v3:64,64,64,16,4,2,1,1,2", + "v3:64,64,128,16,4,2,1,1,2", + "v3:64,128,64,4,2,2,1,1,2", + "v3:64,32,32,8,2,4,1,1,2", + "v3:128,128,128,8,2,2,1,1,2", + "v3:64,32,64,8,4,4,1,1,2", + "v3:64,32,128,16,4,2,1,1,2", + "v3:128,128,64,8,2,2,1,1,2" +}; +// END_GEMM_NonAccel_f32_gfx1101_DEFS + +// BEGIN_CONV_NonAccel_f32_gfx1101_DEFS +const StringRef PopulateParams::initParametersF32ConvGfx1101[] = { + "v3:64,64,128,4,2,2,1,1,2", + "v3:64,64,64,4,2,2,1,1,2", + "v3:64,64,128,4,2,4,1,1,2", + "v3:64,64,128,8,2,4,1,1,2", + "v3:128,64,64,8,2,4,1,1,2", + "v3:64,64,64,4,4,2,1,1,2", + "v3:64,32,32,8,2,2,1,1,2", + "v3:64,64,128,8,2,2,1,1,2", + "v3:64,64,128,8,4,4,1,1,2", + "v3:128,64,64,8,2,2,1,1,2", + "v3:64,32,64,8,4,2,1,1,2", + "v3:128,128,128,4,2,2,1,1,2", + "v3:64,32,128,8,2,2,1,1,2", + "v3:64,32,128,4,2,2,1,1,2", + "v3:128,32,32,16,2,2,1,1,2", + "v3:64,128,64,8,2,4,1,1,2", + "v3:64,64,64,16,2,4,1,1,2", + "v3:64,128,64,4,4,2,1,1,2", + "v3:64,64,128,16,2,4,1,1,2", + "v3:128,128,128,16,4,4,1,1,2", + "v3:256,128,128,8,4,4,1,1,2", + "v3:64,64,64,8,4,4,1,1,2", + "v3:256,32,32,16,2,2,1,1,2", + "v3:64,64,32,8,2,4,1,1,2", + "v3:64,32,32,16,2,2,1,1,2", + "v3:128,128,128,8,4,2,1,1,2", + "v3:128,128,64,8,2,2,1,1,2", + "v3:128,64,32,16,2,4,1,1,2", + "v3:256,64,128,16,4,2,1,1,2", + "v3:128,128,64,16,2,4,1,1,2", + "v3:256,128,64,4,2,2,1,1,2" +}; +// END_CONV_NonAccel_f32_gfx1101_DEFS + // BEGIN_GEMM_NonAccel_f32_gfx1152_DEFS const StringRef PopulateParams::initParametersF32GemmGfx1152[] = { "v3:128,128,128,8,2,4,1,1,2", @@ -457,28 +478,18 @@ static constexpr size_t nInitParametersF32GemmGfx1000 = 14; static const StringRef initParametersF32GemmGfx1000[nInitParametersF32GemmGfx1000]; // END_GEMM_NonAccel_f32_gfx1000_DECS -// BEGIN_GEMM_NonAccel_f32_gfx1100_DECS -static constexpr size_t nInitParametersF32GemmGfx1100 = 13; -static const StringRef initParametersF32GemmGfx1100[nInitParametersF32GemmGfx1100]; -// END_GEMM_NonAccel_f32_gfx1100_DECS - // BEGIN_CONV_NonAccel_f32_gfx1000_DECS static constexpr size_t nInitParametersF32ConvGfx1000 = 24; static const StringRef initParametersF32ConvGfx1000[nInitParametersF32ConvGfx1000]; // END_CONV_NonAccel_f32_gfx1000_DECS -// BEGIN_CONV_NonAccel_f32_gfx1100_DECS -static constexpr size_t nInitParametersF32ConvGfx1100 = 25; -static const StringRef initParametersF32ConvGfx1100[nInitParametersF32ConvGfx1100]; -// END_CONV_NonAccel_f32_gfx1100_DECS - // BEGIN_GEMM_NonAccel_f32_gfx1201_DECS -static constexpr size_t nInitParametersF32GemmGfx1201 = 10; +static constexpr size_t nInitParametersF32GemmGfx1201 = 12; static const StringRef initParametersF32GemmGfx1201[nInitParametersF32GemmGfx1201]; // END_GEMM_NonAccel_f32_gfx1201_DECS // BEGIN_CONV_NonAccel_f32_gfx1201_DECS -static constexpr size_t nInitParametersF32ConvGfx1201 = 29; +static constexpr size_t nInitParametersF32ConvGfx1201 = 26; static const StringRef initParametersF32ConvGfx1201[nInitParametersF32ConvGfx1201]; // END_CONV_NonAccel_f32_gfx1201_DECS @@ -493,15 +504,25 @@ static const StringRef initParametersF32ConvGfx1151[nInitParametersF32ConvGfx115 // END_CONV_NonAccel_f32_gfx1151_DECS // BEGIN_GEMM_NonAccel_f32_gfx1150_DECS -static constexpr size_t nInitParametersF32GemmGfx1150 = 13; +static constexpr size_t nInitParametersF32GemmGfx1150 = 14; static const StringRef initParametersF32GemmGfx1150[nInitParametersF32GemmGfx1150]; // END_GEMM_NonAccel_f32_gfx1150_DECS // BEGIN_CONV_NonAccel_f32_gfx1150_DECS -static constexpr size_t nInitParametersF32ConvGfx1150 = 42; +static constexpr size_t nInitParametersF32ConvGfx1150 = 57; static const StringRef initParametersF32ConvGfx1150[nInitParametersF32ConvGfx1150]; // END_CONV_NonAccel_f32_gfx1150_DECS +// BEGIN_GEMM_NonAccel_f32_gfx1101_DECS +static constexpr size_t nInitParametersF32GemmGfx1101 = 13; +static const StringRef initParametersF32GemmGfx1101[nInitParametersF32GemmGfx1101]; +// END_GEMM_NonAccel_f32_gfx1101_DECS + +// BEGIN_CONV_NonAccel_f32_gfx1101_DECS +static constexpr size_t nInitParametersF32ConvGfx1101 = 31; +static const StringRef initParametersF32ConvGfx1101[nInitParametersF32ConvGfx1101]; +// END_CONV_NonAccel_f32_gfx1101_DECS + // BEGIN_GEMM_NonAccel_f32_gfx1152_DECS static constexpr size_t nInitParametersF32GemmGfx1152 = 17; static const StringRef initParametersF32GemmGfx1152[nInitParametersF32GemmGfx1152]; @@ -528,347 +549,739 @@ static const StringRef initParametersF32GemmGfx1103[nInitParametersF32GemmGfx110 // BEGIN_GEMM_XDL_f32_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersF32GemmGfx908[] = { - "v4:64,64,8,32,32,32,4,1,2,2,0,0,1,1", - "v4:64,64,4,32,32,32,4,1,2,2,0,0,1,1", - "v4:128,64,4,128,16,16,4,1,1,2,0,0,1,1", - "v4:32,32,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,32,8,32,16,16,4,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1" + "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,4,1,1,2,0,0,1,1", + "v4:64,128,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,64,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,64,32,32,4,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,1,1,2,2,0,0,1,1", + "v4:32,64,8,16,64,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,16,16,8,1,2,2,0,0,1,1", + "v4:16,96,8,16,48,16,8,1,2,2,0,0,1,1", + "v4:128,96,8,16,48,16,4,1,2,2,0,0,1,1", + "v4:96,128,4,96,32,32,4,1,2,1,1,64,1,1", + "v4:96,64,8,96,16,16,4,1,1,0,1,32,1,1" }; // END_GEMM_XDL_f32_gfx908_DEFS // BEGIN_GEMM_XDL_f32_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersF32GemmGfx90a[] = { - "v4:32,64,8,16,32,16,4,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:32,64,8,32,16,16,4,1,1,2,0,0,1,1", + "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", "v4:64,64,4,32,32,32,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,4,1,2,2,0,0,1,1", - "v4:64,32,4,16,32,16,8,1,2,2,0,0,1,1", - "v4:128,64,4,128,16,16,4,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1" + "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,32,16,4,1,2,2,0,0,1,1", + "v4:64,128,2,32,32,32,8,1,1,2,0,0,1,1", + "v4:32,64,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,1,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,32,4,16,16,16,8,1,2,2,0,0,1,1" }; // END_GEMM_XDL_f32_gfx90a_DEFS // BEGIN_GEMM_XDL_f32_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersF32GemmGfx942[] = { - "v4:32,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,4,1,2,2,0,0,1,1", - "v4:32,64,8,16,32,16,4,1,2,2,0,0,1,1", - "v4:32,64,8,32,16,16,4,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,64,16,16,1,1,2,2,0,0,1,1", - "v4:256,128,4,128,64,16,4,1,2,2,0,0,1,1" + "v4:64,64,8,16,64,16,4,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,4,1,2,2,0,0,1,1", + "v4:32,128,8,16,64,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,64,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,1,1,2,2,0,0,1,1", + "v4:128,64,4,128,16,16,8,1,1,2,0,0,1,1", + "v4:16,32,4,16,32,16,8,1,2,2,0,0,1,1", + "v4:256,256,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,64,64,16,1,1,2,2,0,0,1,1", + "v4:96,128,8,96,32,32,4,1,2,2,0,0,1,1", + "v4:256,256,8,32,256,32,1,1,4,2,0,0,1,1", + "v4:32,256,8,32,16,16,4,1,1,2,0,0,1,1", + "v4:128,192,4,32,96,16,4,1,1,0,2,0,1,1" }; // END_GEMM_XDL_f32_gfx942_DEFS // BEGIN_GEMM_XDL_f32_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersF32GemmGfx950[] = { - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,128,8,16,128,16,4,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,4,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,4,1,2,2,0,0,1,1" + "v4:16,16,8,16,16,16,8,1,4,2,0,0,1,1", + "v4:64,32,4,32,16,16,8,1,4,2,0,0,1,1", + "v4:64,64,4,64,16,16,4,1,4,2,0,0,1,1", + "v4:32,32,4,16,16,16,8,1,4,2,0,0,1,1", + "v4:16,16,4,16,16,16,32,1,4,2,0,0,1,1", + "v4:128,256,4,128,64,16,4,1,2,2,0,0,1,1", + "v4:32,16,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,4,2,0,0,1,1", + "v4:64,16,8,64,16,16,1,1,1,2,0,0,1,1", + "v4:16,128,4,16,16,16,16,1,4,2,0,0,1,1" }; // END_GEMM_XDL_f32_gfx950_DEFS // BEGIN_CONV_XDL_f32_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersF32ConvGfx908[] = { - "v4:32,32,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,4,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,2,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,64,8,32,32,32,4,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,32,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,128,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:32,32,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,64,2,32,32,32,8,1,1,2,0,0,1,1", + "v4:64,128,4,64,32,32,4,1,1,2,0,0,1,1", + "v4:64,64,4,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,32,4,1,2,2,0,0,1,1", "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,4,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:32,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:16,64,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,256,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:16,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:16,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:64,32,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:32,128,2,32,32,32,8,1,2,2,0,0,1,1", "v4:64,128,4,64,32,16,4,1,1,2,0,0,1,1", - "v4:128,64,4,64,32,32,4,1,2,2,0,0,1,1", - "v4:128,128,2,128,32,32,1,1,2,2,0,0,1,1", - "v4:64,32,2,64,32,32,1,1,1,2,0,0,1,1", - "v4:32,128,4,16,64,16,4,1,2,2,0,0,1,1", - "v4:64,64,2,64,32,32,1,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,4,1,2,2,0,0,1,1" + "v4:16,128,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:16,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:64,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,64,4,16,64,16,4,1,1,2,0,0,1,1", + "v4:64,32,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:16,32,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:128,128,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:32,128,8,32,32,16,4,1,2,2,0,0,1,1", + "v4:32,32,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,128,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:64,64,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:32,256,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,256,8,64,64,16,1,1,2,2,0,0,1,1", + "v4:128,256,2,64,32,32,4,1,1,2,0,0,1,1", + "v4:16,256,4,16,64,16,4,1,1,2,0,0,1,1", + "v4:256,128,2,256,32,32,1,1,2,2,0,0,1,1", + "v4:32,128,4,32,64,32,4,1,1,2,0,0,1,1", + "v4:32,256,4,32,64,32,4,1,1,2,0,0,1,1", + "v4:256,128,2,128,64,32,4,1,2,2,0,0,1,1", + "v4:32,128,4,32,16,16,4,1,2,1,4,0,1,1", + "v4:128,96,2,32,96,32,4,1,2,0,2,16,1,1" }; // END_CONV_XDL_f32_gfx908_DEFS // BEGIN_CONV_XDL_f32_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersF32ConvGfx90a[] = { - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,4,1,2,2,0,0,1,1", "v4:64,64,4,32,32,32,4,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,128,2,64,32,32,4,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,128,4,64,32,16,4,1,1,2,0,0,1,1", + "v4:96,64,4,48,16,16,4,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,32,4,1,1,2,0,0,1,1", + "v4:32,64,8,32,16,16,4,1,1,2,0,0,1,1", + "v4:64,256,2,32,64,32,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,16,4,1,2,2,0,0,1,1", "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,64,16,16,4,1,1,2,0,0,1,1", - "v4:64,32,8,16,32,16,4,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,64,4,32,16,16,4,1,1,2,0,0,1,1", - "v4:64,64,2,64,32,32,1,1,2,2,0,0,1,1", - "v4:32,16,4,32,16,16,1,1,1,2,0,0,1,1", - "v4:128,256,8,128,64,32,1,1,1,2,0,0,1,1", - "v4:32,64,2,32,32,32,4,1,1,2,0,0,1,1", - "v4:32,32,4,32,16,16,1,1,2,2,0,0,1,1" + "v4:96,128,2,96,32,32,4,1,2,2,0,0,1,1", + "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:128,128,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,32,4,32,16,16,8,1,1,2,0,0,1,1", + "v4:32,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,256,2,128,64,32,4,1,1,2,0,0,1,1", + "v4:128,32,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,256,4,64,64,16,4,1,1,2,0,0,1,1", + "v4:16,64,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:16,32,4,16,32,16,4,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,4,1,1,2,0,0,1,1", + "v4:32,128,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:256,256,2,256,32,32,1,1,2,2,0,0,1,1", + "v4:128,128,4,32,32,16,4,1,2,2,0,0,1,1", + "v4:64,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,256,2,32,64,32,4,1,1,2,0,0,1,1", + "v4:32,64,2,32,64,32,4,1,1,2,0,0,1,1", + "v4:256,128,4,128,32,16,1,1,2,2,0,0,1,1", + "v4:32,256,4,32,128,16,1,1,1,2,0,0,1,1", + "v4:64,128,2,64,64,32,4,1,2,2,0,0,1,1", + "v4:256,256,8,256,64,16,1,1,1,2,0,0,1,1", + "v4:16,128,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,256,4,32,64,32,4,1,2,2,0,0,1,1", + "v4:192,32,4,48,16,16,8,1,2,2,0,0,1,1", + "v4:32,128,4,16,16,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,128,64,16,4,1,1,2,0,0,1,1", + "v4:32,64,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,8,1,2,0,8,32,1,1", + "v4:16,64,4,16,16,16,4,1,2,1,2,32,1,1", + "v4:96,128,4,96,32,16,4,1,1,0,1,64,1,1", + "v4:192,128,4,96,32,32,4,1,2,1,0,8,1,1" }; // END_CONV_XDL_f32_gfx90a_DEFS // BEGIN_CONV_XDL_f32_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersF32ConvGfx942[] = { - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,64,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", "v4:32,32,8,16,16,16,4,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,32,8,16,32,16,4,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,2,2,0,0,1,1", - "v4:32,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,32,32,16,4,1,1,2,0,0,1,1", - "v4:64,16,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,32,4,32,16,16,4,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,1,1,1,2,0,0,1,1", - "v4:64,64,4,64,16,16,1,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,1,1,2,2,0,0,1,1", - "v4:32,32,4,32,32,32,1,1,1,2,0,0,1,1", - "v4:32,64,8,16,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,128,32,16,1,1,1,2,0,0,1,1" + "v4:64,32,8,32,16,16,4,1,1,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,4,1,4,2,0,0,1,1", + "v4:16,16,4,16,16,16,16,1,4,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,16,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,128,4,64,32,16,4,1,1,2,0,0,1,1", + "v4:32,64,8,16,32,16,4,1,1,2,0,0,1,1", + "v4:32,32,4,32,16,16,4,1,2,2,0,0,1,1", + "v4:16,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:32,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,16,4,1,2,2,0,0,1,1", + "v4:128,128,8,128,32,16,1,1,4,2,0,0,1,1", + "v4:128,128,4,128,16,16,4,1,1,2,0,0,1,1", + "v4:64,32,4,32,32,16,8,1,2,2,0,0,1,1", + "v4:128,64,4,64,32,16,4,1,4,2,0,0,1,1", + "v4:128,16,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,48,8,16,48,16,4,1,1,2,0,0,1,1", + "v4:256,64,4,128,16,16,4,1,2,2,0,0,1,1", + "v4:48,16,4,48,16,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,64,32,16,1,1,4,2,0,0,1,1", + "v4:64,64,4,64,64,16,4,1,1,2,0,0,1,1", + "v4:32,128,4,32,128,16,1,1,4,2,0,0,1,1", + "v4:128,64,8,64,16,16,4,1,2,2,0,0,1,1", + "v4:192,64,4,48,32,16,4,1,2,2,0,0,1,1", + "v4:64,256,8,64,32,16,1,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,32,64,16,1,1,4,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,3,2,0,0,1,1", + "v4:96,64,4,48,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,4,128,16,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,128,32,16,4,1,1,2,0,0,1,1", + "v4:256,128,4,64,64,16,4,1,2,2,0,0,1,1", + "v4:256,256,4,128,64,16,1,1,4,2,0,0,1,1", + "v4:32,256,4,32,256,16,1,1,4,2,0,0,1,1", + "v4:64,32,8,16,32,16,4,1,3,2,0,0,1,1", + "v4:128,96,4,32,48,16,4,1,1,2,0,0,1,1", + "v4:16,128,4,16,128,16,4,1,1,2,0,0,1,1", + "v4:256,128,8,64,32,16,1,1,2,2,0,0,1,1", + "v4:256,256,8,128,128,16,1,1,1,2,0,0,1,1", + "v4:64,256,4,64,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,2,32,64,32,16,1,2,2,0,0,1,1", + "v4:96,64,8,48,32,16,4,1,1,2,0,0,1,1", + "v4:48,32,4,48,16,16,16,1,3,2,0,0,1,1", + "v4:64,256,4,16,64,16,8,1,4,2,0,0,1,1", + "v4:96,192,8,48,192,16,1,1,4,2,0,0,1,1", + "v4:96,64,4,48,16,16,8,1,2,0,2,64,1,1", + "v4:160,64,8,80,32,16,4,1,1,2,0,0,1,1", + "v4:192,64,4,192,16,16,4,1,2,2,0,0,1,1", + "v4:96,96,8,48,48,16,1,1,4,1,0,0,1,1", + "v4:192,256,4,96,32,16,8,1,1,1,8,8,1,1", + "v4:64,192,8,32,48,16,1,1,4,0,8,0,1,1", + "v4:64,96,4,32,48,16,4,1,1,0,8,64,1,1", + "v4:96,16,4,96,16,16,8,1,1,2,0,0,1,1" }; // END_CONV_XDL_f32_gfx942_DEFS // BEGIN_CONV_XDL_f32_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersF32ConvGfx950[] = { - "v4:128,128,4,128,32,16,1,1,1,2,0,0,1,1", - "v4:32,64,8,16,32,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:64,32,8,16,32,16,4,1,1,2,0,0,1,1", + "v4:64,32,8,32,16,16,4,1,4,2,0,0,1,1", + "v4:32,32,4,16,16,16,8,1,4,2,0,0,1,1", + "v4:16,16,4,16,16,16,8,1,4,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,4,2,0,0,1,1", + "v4:64,16,4,16,16,16,16,1,4,2,0,0,1,1", + "v4:16,16,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,16,8,32,16,16,4,1,4,2,0,0,1,1", + "v4:64,32,4,32,16,16,8,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,16,8,16,16,16,4,1,2,2,0,0,1,1", "v4:32,32,8,16,16,16,4,1,2,2,0,0,1,1", "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:128,32,4,64,16,16,8,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,2,2,0,0,1,1", - "v4:256,32,4,128,16,16,4,1,1,2,0,0,1,1", - "v4:128,32,8,32,32,16,4,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,1,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,4,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,4,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,4,64,16,16,1,1,1,2,0,0,1,1", - "v4:128,64,4,128,16,16,4,1,1,2,0,0,1,1", - "v4:64,64,4,32,32,16,8,1,1,2,0,0,1,1", - "v4:64,32,4,16,32,16,4,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,128,32,16,1,1,2,2,0,0,1,1", - "v4:32,32,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,2,32,32,32,1,1,2,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:128,32,4,32,32,32,8,1,2,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:128,256,2,128,64,32,4,1,2,2,0,0,1,1", + "v4:16,32,8,16,32,16,4,1,4,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,3,2,0,0,1,1", + "v4:64,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,16,4,1,1,2,0,0,1,1", "v4:64,128,4,64,32,16,4,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,64,2,64,32,32,1,1,2,2,0,0,1,1", - "v4:64,32,8,32,16,16,4,1,2,2,0,0,1,1" + "v4:64,16,4,64,16,16,8,1,4,2,0,0,1,1", + "v4:128,32,4,32,32,16,8,1,4,2,0,0,1,1", + "v4:128,128,4,128,128,16,1,1,4,2,0,0,1,1", + "v4:32,64,4,16,32,16,4,1,2,2,0,0,1,1", + "v4:256,64,4,256,16,16,4,1,4,2,0,0,1,1", + "v4:128,64,8,128,64,16,1,1,4,2,0,0,1,1", + "v4:64,128,4,64,16,16,4,1,2,2,0,0,1,1", + "v4:128,64,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,128,32,16,4,1,2,2,0,0,1,1", + "v4:32,64,4,32,64,16,4,1,1,2,0,0,1,1", + "v4:64,32,4,64,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,4,1,4,2,0,0,1,1", + "v4:64,32,4,64,16,16,4,1,4,2,0,0,1,1", + "v4:128,64,4,128,64,16,4,1,4,2,0,0,1,1", + "v4:32,128,4,32,16,16,4,1,2,2,0,0,1,1", + "v4:32,128,2,32,32,32,4,1,2,2,0,0,1,1", + "v4:32,32,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:64,256,4,64,64,32,1,1,4,2,0,0,1,1", + "v4:128,256,4,128,128,16,1,1,4,2,0,0,1,1", + "v4:32,256,4,32,64,16,4,1,3,2,0,0,1,1", + "v4:64,32,4,64,32,16,8,1,4,2,0,0,1,1", + "v4:192,64,4,96,32,16,8,1,4,2,0,0,1,1", + "v4:48,16,4,48,16,16,8,1,1,2,0,0,1,1", + "v4:256,256,8,256,16,16,1,1,2,2,0,0,1,1", + "v4:256,32,4,64,32,16,4,1,1,2,0,0,1,1", + "v4:64,32,4,32,32,32,4,1,2,2,0,0,1,1", + "v4:64,256,4,64,32,32,4,1,3,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,16,8,64,16,16,4,1,1,2,0,0,1,1", + "v4:128,256,4,128,16,16,8,1,1,2,0,0,1,1", + "v4:160,80,8,80,80,16,4,1,4,2,0,0,1,1", + "v4:192,64,8,48,32,16,4,1,1,2,0,0,1,1", + "v4:128,160,8,32,160,32,4,1,1,2,0,0,1,1", + "v4:16,16,4,16,16,16,4,1,2,0,1,4,1,1", + "v4:128,48,8,16,48,16,4,1,2,2,0,0,1,1", + "v4:96,16,8,96,16,16,4,1,4,2,0,0,1,1", + "v4:96,64,4,48,16,16,4,1,4,1,0,8,1,1", + "v4:16,64,4,16,32,16,4,1,2,0,1,64,1,1", + "v4:192,32,8,48,32,16,4,1,4,1,4,64,1,1", + "v4:192,32,8,96,32,16,4,1,4,1,1,32,1,1", + "v4:48,16,4,48,16,16,4,1,4,0,8,4,1,1", + "v4:64,16,4,16,16,16,16,1,4,1,8,0,1,1", + "v4:80,16,8,80,16,16,4,1,4,0,4,32,1,1" }; // END_CONV_XDL_f32_gfx950_DEFS // BEGIN_GEMM_XDL_f16_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersF16GemmGfx908[] = { - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,16,16,16,16,1,2,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,128,16,4,1,2,2,0,0,1,1", - "v4:64,128,4,32,64,32,8,1,2,2,0,0,1,1", - "v4:16,64,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,128,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:128,64,4,64,32,32,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,128,32,8,1,2,2,0,0,1,1", - "v4:128,128,8,32,128,32,4,1,1,2,0,0,1,1", - "v4:128,256,8,128,64,16,4,1,2,2,0,0,1,1", - "v4:128,256,8,64,128,32,4,1,1,2,0,0,1,1", - "v4:128,64,8,32,64,16,8,1,2,2,0,0,1,1", - "v4:64,128,4,64,32,32,8,1,1,2,0,0,1,1", - "v4:256,128,4,128,64,16,8,1,1,2,0,0,1,1" + "v4:64,128,8,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,32,32,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,128,8,64,64,32,4,1,1,2,0,0,1,1", + "v4:128,256,8,32,64,32,8,1,1,2,0,0,1,1", + "v4:32,128,8,32,16,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,128,32,32,8,1,1,2,0,0,1,1", + "v4:256,256,8,256,16,16,8,1,1,2,0,0,1,1", + "v4:256,256,4,32,128,32,16,1,1,2,0,0,1,1", + "v4:64,32,8,32,32,16,16,1,2,2,0,0,1,1", + "v4:256,192,8,32,96,32,8,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,32,8,1,1,0,1,64,1,1", + "v4:128,64,8,32,64,32,8,1,2,0,8,16,1,1", + "v4:160,32,4,80,16,16,16,1,1,1,1,0,1,1", + "v4:256,256,4,128,32,32,8,1,1,0,2,0,1,1", + "v4:32,16,4,16,16,16,16,1,2,0,0,16,1,1", + "v4:80,128,8,80,16,16,8,1,2,1,1,32,1,1" }; // END_GEMM_XDL_f16_gfx908_DEFS // BEGIN_GEMM_XDL_f16_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersF16GemmGfx90a[] = { + "v4:16,64,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,64,32,32,4,1,1,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,64,64,32,8,1,1,2,0,0,1,1", - "v4:64,128,8,64,32,16,4,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,128,4,32,32,16,8,1,1,2,0,0,1,1", - "v4:16,64,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:128,128,8,64,64,32,8,1,2,2,0,0,1,1", - "v4:128,256,8,128,64,16,4,1,2,2,0,0,1,1", - "v4:64,256,4,64,64,16,4,1,1,2,0,0,1,1", - "v4:256,128,4,128,64,16,8,1,1,2,0,0,1,1" + "v4:128,128,8,128,32,16,4,1,1,2,0,0,1,1", + "v4:32,64,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:32,32,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,64,32,32,8,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,128,8,32,32,32,16,1,1,2,0,0,1,1", + "v4:128,32,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:256,256,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,32,4,1,1,1,8,16,1,1", + "v4:80,128,8,80,16,16,8,1,2,1,0,64,1,1", + "v4:256,256,4,256,64,16,4,1,1,0,0,0,1,1", + "v4:32,256,8,32,32,16,4,1,1,0,0,0,1,1", + "v4:64,256,8,64,32,32,4,1,1,0,2,16,1,1", + "v4:96,256,8,96,32,16,4,1,2,2,0,0,1,1" }; // END_GEMM_XDL_f16_gfx90a_DEFS // BEGIN_GEMM_XDL_f16_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersF16GemmGfx942[] = { - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,16,32,16,8,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:32,32,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,128,4,32,64,32,8,1,1,2,0,0,1,1", - "v4:64,32,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:128,128,8,64,64,16,8,1,2,2,0,0,1,1", - "v4:256,128,4,128,64,16,8,1,1,2,0,0,1,1", - "v4:64,128,4,64,32,32,8,1,1,2,0,0,1,1", - "v4:128,128,2,64,64,32,8,1,1,2,0,0,1,1", - "v4:128,256,8,64,128,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,4,16,16,16,4,1,2,2,0,0,1,1", - "v4:256,32,2,128,32,32,4,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,32,4,64,16,16,8,1,2,2,0,0,1,1", - "v4:128,256,8,64,128,32,4,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,4,1,2,2,0,0,1,1" + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,32,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:32,64,8,16,32,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:16,64,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,32,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,32,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,4,2,0,0,1,1", + "v4:128,64,8,64,32,32,8,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,32,64,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:16,256,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:256,256,8,32,128,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,128,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,64,128,32,4,1,1,2,0,0,1,1", + "v4:256,256,4,64,128,32,8,1,2,2,0,0,1,1", + "v4:128,128,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,256,4,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,32,4,64,32,16,16,1,3,2,0,0,1,1", + "v4:96,128,8,96,32,32,4,1,2,2,0,0,1,1", + "v4:192,256,8,192,32,16,4,1,2,2,0,0,1,1", + "v4:256,256,8,32,128,32,8,1,1,1,2,32,1,1", + "v4:128,128,8,64,64,32,4,1,1,0,1,64,1,1", + "v4:192,192,4,96,192,16,8,1,1,1,1,16,1,1" }; // END_GEMM_XDL_f16_gfx942_DEFS // BEGIN_GEMM_XDL_f16_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersF16GemmGfx950[] = { - "v4:32,64,8,16,32,16,8,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,16,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:16,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,32,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,32,1,4,2,0,0,1,1", + "v4:32,32,4,32,16,16,16,1,4,2,0,0,1,1", "v4:128,64,8,32,64,16,8,1,2,2,0,0,1,1", - "v4:128,128,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:256,128,4,128,64,32,8,1,2,2,0,0,1,1", - "v4:32,128,4,16,64,16,4,1,2,2,0,0,1,1", - "v4:32,128,8,32,32,32,8,1,1,2,0,0,1,1" + "v4:32,64,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,3,2,0,0,1,1", + "v4:128,128,8,32,64,32,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,16,1,3,2,0,0,1,1", + "v4:64,64,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:64,16,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,4,64,32,32,8,1,4,2,0,0,1,1", + "v4:32,64,4,32,32,32,8,1,4,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,4,2,0,0,1,1", + "v4:16,64,4,16,16,16,32,1,4,2,0,0,1,1", + "v4:32,128,4,32,64,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,64,64,32,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,64,16,8,1,4,2,0,0,1,1", + "v4:128,256,2,128,64,32,8,1,4,2,0,0,1,1", + "v4:256,128,4,256,32,16,8,1,4,2,0,0,1,1", + "v4:128,128,4,128,64,16,4,1,2,2,0,0,1,1", + "v4:256,128,8,64,128,16,8,1,1,2,0,0,1,1", + "v4:256,256,4,64,64,32,8,1,2,2,0,0,1,1", + "v4:256,256,2,128,64,32,16,1,2,2,0,0,1,1", + "v4:128,256,8,128,32,32,8,1,1,2,0,0,1,1", + "v4:96,48,8,48,48,16,8,1,4,1,2,4,1,1" }; // END_GEMM_XDL_f16_gfx950_DEFS // BEGIN_CONV_XDL_f16_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersF16ConvGfx908[] = { - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,32,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,32,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,8,128,32,32,4,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,32,16,1,2,2,0,0,1,1", + "v4:128,256,2,128,64,32,4,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,32,4,1,1,2,0,0,1,1", + "v4:16,32,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,16,16,16,16,1,2,2,0,0,1,1", "v4:64,64,4,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,128,8,64,32,32,8,1,2,2,0,0,1,1", - "v4:64,128,8,32,64,32,4,1,1,2,0,0,1,1", - "v4:32,128,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,64,64,32,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,4,16,16,16,8,1,1,2,0,0,1,1", + "v4:16,32,4,16,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,32,32,8,1,1,2,0,0,1,1", + "v4:32,128,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,32,4,32,16,16,8,1,2,2,0,0,1,1", "v4:32,256,2,32,64,32,4,1,2,2,0,0,1,1", - "v4:128,256,2,128,64,32,4,1,1,2,0,0,1,1", - "v4:64,32,2,64,32,32,4,1,1,2,0,0,1,1" + "v4:128,128,2,64,32,32,8,1,2,2,0,0,1,1", + "v4:32,32,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:128,128,2,64,64,32,16,1,1,2,0,0,1,1", + "v4:32,64,4,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,64,4,32,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,8,32,64,32,8,1,1,2,0,0,1,1", + "v4:128,32,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,64,8,64,32,32,4,1,2,2,0,0,1,1", + "v4:64,64,4,64,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,4,64,32,32,4,1,1,2,0,0,1,1", + "v4:128,32,4,32,32,32,16,1,1,2,0,0,1,1", + "v4:16,64,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,2,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,256,4,64,32,32,16,1,1,2,0,0,1,1", + "v4:128,48,8,16,48,16,4,1,2,2,0,0,1,1", + "v4:256,64,4,128,32,32,8,1,1,2,0,0,1,1", + "v4:64,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:256,256,2,64,64,32,16,1,2,2,0,0,1,1", + "v4:128,256,8,128,64,32,4,1,1,2,0,0,1,1", + "v4:64,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:256,64,4,64,32,32,8,1,2,2,0,0,1,1", + "v4:64,256,2,64,32,32,16,1,2,2,0,0,1,1", + "v4:128,64,2,128,32,32,16,1,1,2,0,0,1,1", + "v4:128,32,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:256,64,8,32,64,32,8,1,1,2,0,0,1,1", + "v4:256,32,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:192,64,4,96,32,16,8,1,2,2,0,0,1,1", + "v4:64,32,4,64,32,32,16,1,1,2,0,0,1,1", + "v4:128,128,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:256,256,8,64,64,32,8,1,1,2,0,0,1,1", + "v4:256,128,8,32,64,32,8,1,1,2,0,0,1,1", + "v4:192,256,2,96,32,32,16,1,2,2,0,0,1,1", + "v4:160,64,4,160,32,32,8,1,2,1,8,16,1,1", + "v4:96,128,4,48,32,16,16,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,8,1,2,1,1,4,1,1", + "v4:128,256,4,64,256,32,8,1,1,0,0,0,1,1", + "v4:192,64,8,96,16,16,8,1,1,1,8,32,1,1", + "v4:256,192,4,32,96,32,4,1,2,0,4,0,1,1", + "v4:32,16,4,16,16,16,8,1,2,1,1,8,1,1" }; // END_CONV_XDL_f16_gfx908_DEFS // BEGIN_CONV_XDL_f16_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersF16ConvGfx90a[] = { - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,8,1,1,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,32,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,128,32,32,8,1,2,2,0,0,1,1", - "v4:64,64,2,32,32,32,8,1,1,2,0,0,1,1", - "v4:64,32,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,128,8,32,32,32,4,1,1,2,0,0,1,1", - "v4:16,128,4,16,32,16,4,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,2,128,64,32,4,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,32,4,1,1,2,0,0,1,1", "v4:64,256,2,64,64,32,4,1,2,2,0,0,1,1", - "v4:128,64,2,128,32,32,4,1,1,2,0,0,1,1" + "v4:128,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:32,256,2,32,64,32,4,1,2,2,0,0,1,1", + "v4:64,256,8,32,64,32,4,1,1,2,0,0,1,1", + "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,2,64,128,32,4,1,2,2,0,0,1,1", + "v4:32,32,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:128,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,8,128,32,32,4,1,1,2,0,0,1,1", + "v4:16,32,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,8,1,1,2,0,0,1,1", + "v4:64,32,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,32,8,32,16,16,16,1,1,2,0,0,1,1", + "v4:32,64,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:256,128,8,128,32,16,4,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,32,8,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,32,8,1,2,2,0,0,1,1", + "v4:32,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,16,4,32,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,32,8,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,32,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", + "v4:256,64,4,128,32,32,8,1,1,2,0,0,1,1", + "v4:128,256,4,128,64,32,4,1,1,2,0,0,1,1", + "v4:64,256,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:16,32,4,16,32,16,16,1,1,2,0,0,1,1", + "v4:32,32,8,16,16,16,4,1,1,2,0,0,1,1", + "v4:64,64,2,32,64,32,4,1,2,2,0,0,1,1", + "v4:64,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:32,64,4,16,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,256,4,64,128,32,8,1,1,2,0,0,1,1", + "v4:128,64,8,32,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,128,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,8,32,64,16,4,1,1,2,0,0,1,1", + "v4:128,64,4,64,64,32,8,1,2,2,0,0,1,1", + "v4:128,256,4,32,128,32,8,1,1,2,0,0,1,1", + "v4:128,256,4,64,128,16,4,1,1,2,0,0,1,1", + "v4:192,32,4,48,32,16,16,1,1,2,0,0,1,1", + "v4:256,128,4,32,64,32,16,1,1,2,0,0,1,1", + "v4:128,32,4,16,32,16,16,1,2,2,0,0,1,1", + "v4:128,32,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:256,32,4,16,32,16,8,1,2,2,0,0,1,1", + "v4:256,256,4,64,64,32,16,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,32,16,1,2,2,0,0,1,1", + "v4:256,128,8,64,128,32,4,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,4,1,1,2,0,0,1,1", + "v4:64,32,8,64,32,32,4,1,2,2,0,0,1,1", + "v4:128,128,4,32,128,32,16,1,1,2,0,0,1,1", + "v4:96,32,4,96,16,16,16,1,2,2,0,0,1,1", + "v4:64,256,4,64,16,16,16,1,1,2,0,0,1,1", + "v4:64,32,4,64,32,16,16,1,1,2,0,0,1,1", + "v4:128,256,4,32,256,16,4,1,1,2,0,0,1,1", + "v4:192,64,8,96,32,16,4,1,2,2,0,0,1,1", + "v4:256,48,8,16,48,16,8,1,1,1,1,4,1,1", + "v4:256,128,8,128,32,32,4,1,1,1,8,4,1,1", + "v4:256,192,8,64,96,32,8,1,1,0,1,64,1,1", + "v4:96,64,8,96,32,32,4,1,1,1,0,64,1,1" }; // END_CONV_XDL_f16_gfx90a_DEFS // BEGIN_CONV_XDL_f16_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersF16ConvGfx942[] = { - "v4:16,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,16,8,32,16,16,8,1,2,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,64,8,32,32,32,8,1,1,2,0,0,1,1", - "v4:16,64,8,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,128,4,32,64,32,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,4,1,1,2,0,0,1,1", - "v4:128,256,4,128,64,32,8,1,2,2,0,0,1,1", - "v4:64,64,8,16,64,16,4,1,1,2,0,0,1,1", - "v4:16,64,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,32,4,32,32,32,8,1,2,2,0,0,1,1", - "v4:32,128,8,32,32,32,1,1,2,2,0,0,1,1", - "v4:128,64,2,64,32,32,8,1,2,2,0,0,1,1", - "v4:64,32,4,16,32,16,4,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,32,4,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,16,8,32,16,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,16,4,1,2,2,0,0,1,1", "v4:32,256,2,32,64,32,4,1,2,2,0,0,1,1", - "v4:64,32,2,64,32,32,4,1,1,2,0,0,1,1" + "v4:16,128,4,16,32,16,8,1,2,2,0,0,1,1", + "v4:16,32,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,64,8,64,32,16,4,1,1,2,0,0,1,1", + "v4:16,16,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,2,128,64,32,4,1,2,2,0,0,1,1", + "v4:128,32,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,32,32,32,8,1,2,2,0,0,1,1", + "v4:128,16,8,64,16,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,32,2,64,32,32,16,1,1,2,0,0,1,1", + "v4:128,32,2,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,32,4,128,16,16,4,1,2,2,0,0,1,1", + "v4:128,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:48,32,8,48,16,16,4,1,2,2,0,0,1,1", + "v4:64,64,2,64,64,32,16,1,2,2,0,0,1,1", + "v4:64,128,4,64,128,16,4,1,1,2,0,0,1,1", + "v4:256,256,4,64,128,16,8,1,2,2,0,0,1,1", + "v4:64,256,4,64,64,32,4,1,1,2,0,0,1,1", + "v4:128,128,8,32,64,32,1,1,2,2,0,0,1,1", + "v4:32,128,4,32,128,16,4,1,1,2,0,0,1,1", + "v4:256,32,8,32,16,16,4,1,2,2,0,0,1,1", + "v4:32,256,4,32,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,128,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,256,4,128,64,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,64,64,32,8,1,2,2,0,0,1,1", + "v4:256,128,8,128,32,16,4,1,1,2,0,0,1,1", + "v4:256,64,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:32,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,2,128,128,32,8,1,2,2,0,0,1,1", + "v4:128,16,8,16,16,16,4,1,1,2,0,0,1,1", + "v4:128,256,8,128,128,32,1,1,2,2,0,0,1,1", + "v4:128,64,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,256,8,16,128,16,4,1,1,2,0,0,1,1", + "v4:32,128,8,32,32,32,4,1,3,2,0,0,1,1", + "v4:64,128,8,64,128,16,4,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,64,128,16,4,1,1,2,0,0,1,1", + "v4:192,32,4,48,32,16,16,1,1,2,0,0,1,1", + "v4:256,64,4,256,64,16,4,1,1,2,0,0,1,1", + "v4:96,16,8,48,16,16,16,1,2,2,0,0,1,1", + "v4:256,256,4,128,64,16,4,1,2,2,0,0,1,1", + "v4:256,256,4,64,64,32,4,1,1,2,0,0,1,1", + "v4:32,64,4,32,64,32,16,1,3,2,0,0,1,1", + "v4:128,64,4,128,16,16,16,1,1,2,0,0,1,1", + "v4:256,64,2,256,64,32,16,1,1,2,0,0,1,1", + "v4:128,256,2,128,64,32,8,1,4,2,0,0,1,1", + "v4:256,32,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,2,32,256,32,16,1,1,2,0,0,1,1", + "v4:256,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:16,16,4,16,16,16,8,1,2,0,0,4,1,1", + "v4:16,16,8,16,16,16,8,1,2,1,0,4,1,1", + "v4:160,64,4,160,32,32,8,1,2,2,0,0,1,1", + "v4:256,48,8,16,48,16,8,1,1,2,0,0,1,1", + "v4:160,64,8,80,32,16,8,1,1,0,0,4,1,1", + "v4:96,128,8,48,32,16,8,1,2,1,0,8,1,1", + "v4:96,64,4,48,16,16,16,1,2,0,8,4,1,1", + "v4:128,128,4,128,32,16,8,1,2,0,8,4,1,1", + "v4:128,256,2,32,256,32,4,1,1,0,0,64,1,1", + "v4:128,256,4,32,256,32,8,1,3,2,0,0,1,1", + "v4:192,32,8,48,32,16,8,1,1,0,8,16,1,1", + "v4:224,32,4,224,32,32,16,1,3,2,0,0,1,1", + "v4:256,256,2,32,128,32,16,1,2,2,0,0,1,1", + "v4:256,256,4,128,64,32,8,1,2,0,2,8,1,1", + "v4:32,64,8,32,32,16,16,1,3,2,0,0,1,1", + "v4:32,64,8,32,64,32,16,1,4,2,0,0,1,1", + "v4:64,192,4,16,192,16,8,1,2,2,0,0,1,1", + "v4:64,64,4,16,16,16,32,1,3,2,0,0,1,1" }; // END_CONV_XDL_f16_gfx942_DEFS // BEGIN_CONV_XDL_f16_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersF16ConvGfx950[] = { - "v4:64,128,8,32,64,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:32,64,8,32,32,32,1,1,1,2,0,0,1,1", - "v4:128,128,8,64,64,32,8,1,2,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,4,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,256,8,16,256,16,4,1,2,2,0,0,1,1", - "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:64,256,2,64,64,32,4,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,256,2,32,128,32,4,1,1,2,0,0,1,1", - "v4:64,64,8,64,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", "v4:64,64,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,64,4,32,32,32,8,1,2,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,32,4,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,4,32,64,16,16,1,2,2,0,0,1,1", + "v4:256,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,4,32,32,32,16,1,1,2,0,0,1,1", + "v4:16,32,8,16,32,16,4,1,1,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,2,64,64,32,4,1,1,2,0,0,1,1", + "v4:64,32,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:16,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:32,256,2,32,64,32,4,1,1,2,0,0,1,1", + "v4:128,64,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:32,32,4,32,16,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,4,1,2,2,0,0,1,1", + "v4:32,128,4,32,64,32,8,1,2,2,0,0,1,1", + "v4:64,16,4,64,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,64,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,2,128,32,32,16,1,1,2,0,0,1,1", + "v4:32,64,4,32,16,16,4,1,1,2,0,0,1,1", + "v4:128,64,4,128,32,32,8,1,1,2,0,0,1,1", + "v4:256,128,4,128,64,32,8,1,1,2,0,0,1,1", + "v4:32,128,8,32,32,32,16,1,2,2,0,0,1,1", + "v4:64,32,8,32,16,16,8,1,2,2,0,0,1,1", "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,16,32,16,4,1,2,2,0,0,1,1", + "v4:256,64,2,64,64,32,16,1,2,2,0,0,1,1", + "v4:32,128,4,32,128,16,8,1,1,2,0,0,1,1", + "v4:64,32,4,16,32,16,16,1,1,2,0,0,1,1", + "v4:128,32,4,32,32,16,16,1,2,2,0,0,1,1", + "v4:256,64,4,256,16,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,32,16,4,1,2,2,0,0,1,1", + "v4:128,128,4,64,32,16,16,1,2,2,0,0,1,1", + "v4:128,32,4,32,32,32,16,1,1,2,0,0,1,1", + "v4:128,32,4,64,32,16,8,1,2,2,0,0,1,1", + "v4:16,64,8,16,64,16,16,1,1,2,0,0,1,1", + "v4:256,128,8,64,64,32,8,1,2,2,0,0,1,1", + "v4:64,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:256,32,4,32,16,16,8,1,2,2,0,0,1,1", + "v4:256,64,8,64,32,32,8,1,2,2,0,0,1,1", + "v4:192,32,4,48,32,16,16,1,1,2,0,0,1,1", + "v4:32,64,4,32,16,16,4,1,3,2,0,0,1,1", + "v4:256,32,8,64,32,16,8,1,2,2,0,0,1,1", + "v4:256,128,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,64,32,32,4,1,1,2,0,0,1,1", + "v4:256,256,8,256,16,16,4,1,2,2,0,0,1,1", + "v4:48,32,8,48,16,16,8,1,2,2,0,0,1,1", + "v4:16,16,4,16,16,16,8,1,2,0,1,4,1,1", + "v4:32,80,8,16,80,16,8,1,2,2,0,0,1,1", + "v4:48,48,4,48,48,16,8,1,2,0,1,32,1,1", + "v4:192,32,4,96,16,16,16,1,1,0,4,32,1,1", + "v4:192,64,8,96,32,32,8,1,2,2,0,0,1,1", + "v4:256,256,8,64,64,32,8,1,1,2,0,0,1,1", + "v4:128,256,4,64,128,16,16,1,2,2,0,0,1,1", + "v4:192,256,2,192,64,32,4,1,1,2,0,0,1,1", + "v4:192,32,8,96,16,16,8,1,2,2,0,0,1,1", + "v4:256,32,8,256,16,16,8,1,1,2,0,0,1,1", "v4:256,32,8,64,32,32,8,1,2,2,0,0,1,1", - "v4:32,128,4,32,32,32,8,1,1,2,0,0,1,1", - "v4:128,32,8,32,32,32,8,1,2,2,0,0,1,1", - "v4:256,128,4,128,64,32,8,1,2,2,0,0,1,1", - "v4:256,64,8,64,64,32,8,1,2,2,0,0,1,1", - "v4:32,32,2,32,32,32,4,1,1,2,0,0,1,1" + "v4:256,64,4,128,32,32,16,1,1,1,2,32,1,1", + "v4:96,16,8,48,16,16,8,1,2,2,0,0,1,1", + "v4:96,256,8,96,32,32,4,1,2,2,0,0,1,1", + "v4:96,64,8,48,32,16,16,1,2,0,4,4,1,1", + "v4:32,16,8,16,16,16,16,1,2,1,4,32,1,1", + "v4:64,96,8,32,96,16,8,1,2,1,4,4,1,1", + "v4:128,64,8,32,32,16,8,1,2,1,2,16,1,1", + "v4:16,128,8,16,64,16,32,1,4,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,1,0,4,16,1,1", + "v4:192,32,8,48,32,16,8,1,2,1,8,64,1,1", + "v4:256,128,2,64,64,32,16,1,2,0,1,8,1,1", + "v4:256,128,4,128,64,16,8,1,1,1,8,8,1,1", + "v4:256,32,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,192,4,32,96,32,8,1,2,0,8,32,1,1", + "v4:96,16,4,96,16,16,8,1,1,1,1,8,1,1", + "v4:96,48,4,96,48,16,8,1,1,0,0,8,1,1", + "v4:96,64,4,48,16,16,16,1,2,0,1,0,1,1" }; // END_CONV_XDL_f16_gfx950_DEFS @@ -928,64 +1341,126 @@ const StringRef PopulateParamsXDL::initParametersFp8ConvGfx900[] = { // BEGIN_GEMM_XDL_i8_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersI8GemmGfx908[] = { - "v4:64,32,4,16,32,16,16,1,2,2,0,0,1,1", - "v4:64,64,4,32,32,32,16,1,2,2,0,0,1,1", - "v4:128,64,16,128,16,16,4,1,1,2,0,0,1,1", - "v4:128,128,8,128,32,32,8,1,2,2,0,0,1,1", "v4:32,32,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,64,32,32,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,16,16,16,16,16,1,1,2,0,0,1,1" + "v4:32,64,16,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,32,16,1,1,2,0,0,1,1", + "v4:32,32,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,16,128,32,32,4,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,16,16,32,16,16,1,2,2,0,0,1,1", + "v4:32,128,32,16,16,16,8,1,1,2,0,0,1,1", + "v4:32,256,32,32,32,32,1,1,2,2,0,0,1,1", + "v4:256,64,16,128,32,32,1,1,2,2,0,0,1,1", + "v4:48,64,8,48,16,16,16,1,1,2,0,0,1,1", + "v4:64,128,16,16,128,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,64,64,32,16,1,2,1,0,0,1,1", + "v4:128,32,4,32,32,32,16,1,2,1,0,0,1,1", + "v4:48,256,16,48,16,16,8,1,1,0,0,4,1,1", + "v4:64,128,32,64,32,32,1,1,1,0,4,0,1,1", + "v4:80,128,16,80,16,16,8,1,2,1,8,8,1,1", + "v4:96,128,16,48,16,16,16,1,1,1,2,16,1,1" }; // END_GEMM_XDL_i8_gfx908_DEFS // BEGIN_GEMM_XDL_i8_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersI8GemmGfx90a[] = { - "v4:64,64,8,32,32,16,16,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,16,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:64,256,8,64,64,16,4,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,128,4,32,32,16,16,1,2,2,0,0,1,1", - "v4:128,64,8,32,64,32,8,1,1,2,0,0,1,1" + "v4:32,32,16,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,16,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:32,64,32,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,16,64,16,16,1,1,2,0,0,1,1", + "v4:64,128,16,64,32,16,4,1,1,2,0,0,1,1", + "v4:128,128,16,128,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,16,64,64,16,4,1,1,2,0,0,1,1", + "v4:16,128,16,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,64,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:96,128,16,96,32,32,4,1,2,2,0,0,1,1", + "v4:256,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,64,32,16,32,16,1,1,2,2,0,0,1,1", + "v4:32,128,32,32,32,32,1,1,2,2,0,0,1,1", + "v4:128,128,4,32,32,16,4,1,2,2,0,0,1,1", + "v4:128,32,16,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,256,16,32,32,32,4,1,1,2,0,0,1,1", + "v4:128,128,16,32,64,16,8,1,2,2,0,0,1,1", + "v4:16,16,32,16,16,16,16,1,1,0,4,0,1,1", + "v4:64,64,32,32,64,32,1,1,1,2,0,0,1,1", + "v4:128,128,32,32,32,16,8,1,2,0,0,4,1,1", + "v4:128,16,32,32,16,16,8,1,1,2,0,0,1,1", + "v4:128,32,16,128,32,32,16,1,1,2,0,0,1,1", + "v4:64,128,32,16,32,16,8,1,1,0,2,32,1,1", + "v4:80,128,8,80,16,16,16,1,2,0,4,8,1,1" }; // END_GEMM_XDL_i8_gfx90a_DEFS // BEGIN_GEMM_XDL_i8_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersI8GemmGfx942[] = { - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,64,16,32,32,32,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,128,16,16,1,1,2,0,0,1,1", - "v4:128,16,4,32,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,16,32,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,64,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,128,16,16,32,16,16,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:256,128,8,64,128,16,8,1,1,2,0,0,1,1", - "v4:128,32,16,32,32,32,8,1,2,2,0,0,1,1", - "v4:64,16,32,16,16,16,8,1,2,2,0,0,1,1" + "v4:32,32,32,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,16,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:32,64,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:32,128,8,32,32,16,16,1,2,2,0,0,1,1", + "v4:64,16,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,32,64,32,16,1,1,2,0,0,1,1", + "v4:64,64,32,16,32,16,16,1,1,2,0,0,1,1", + "v4:128,256,8,64,128,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,64,32,16,1,1,2,0,0,1,1", + "v4:128,128,16,128,16,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,64,16,32,32,16,16,1,1,2,0,0,1,1", + "v4:128,64,4,32,32,32,8,1,3,2,0,0,1,1", + "v4:128,256,8,16,256,16,8,1,2,2,0,0,1,1", + "v4:96,128,16,96,32,16,8,1,2,1,0,32,1,1", + "v4:256,256,16,128,32,32,8,1,2,2,0,0,1,1", + "v4:128,64,8,32,64,16,16,1,1,1,0,32,1,1", + "v4:192,128,8,96,64,16,16,1,1,0,1,8,1,1", + "v4:192,256,16,96,64,16,8,1,1,0,2,0,1,1", + "v4:192,64,8,96,32,16,16,1,1,0,2,0,1,1", + "v4:64,64,16,32,32,16,16,1,2,1,4,4,1,1" }; // END_GEMM_XDL_i8_gfx942_DEFS // BEGIN_GEMM_XDL_i8_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersI8GemmGfx950[] = { - "v4:64,64,16,32,32,32,16,1,1,2,0,0,1,1", - "v4:32,64,16,32,16,16,16,1,2,2,0,0,1,1", - "v4:32,16,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,64,32,32,32,32,16,1,2,2,0,0,1,1", - "v4:128,64,16,32,64,32,16,1,1,2,0,0,1,1", - "v4:64,64,4,32,32,32,8,1,1,2,0,0,1,1", - "v4:128,128,8,128,32,16,8,1,2,2,0,0,1,1", - "v4:32,16,16,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,128,16,16,1,1,2,0,0,1,1", - "v4:64,16,32,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:16,16,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,32,32,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,32,16,16,16,16,1,1,2,0,0,1,1" + "v4:16,16,8,16,16,16,16,1,4,2,0,0,1,1", + "v4:32,16,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:16,64,4,16,16,16,16,1,4,2,0,0,1,1", + "v4:64,32,32,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,16,16,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,2,2,0,0,1,1", + "v4:64,64,16,32,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,64,32,32,16,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,16,1,2,2,0,0,1,1", + "v4:128,128,4,128,64,32,16,1,4,2,0,0,1,1", + "v4:128,16,8,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,64,16,32,32,32,16,1,2,2,0,0,1,1", + "v4:256,64,8,64,64,32,16,1,1,2,0,0,1,1", + "v4:128,64,32,128,64,32,1,1,4,2,0,0,1,1", + "v4:16,32,8,16,32,16,8,1,3,2,0,0,1,1", + "v4:256,128,4,256,32,16,16,1,4,2,0,0,1,1", + "v4:32,128,8,16,64,16,16,1,1,2,0,0,1,1", + "v4:256,32,32,128,32,32,1,1,4,2,0,0,1,1", + "v4:64,64,16,32,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,32,256,32,16,1,2,2,0,0,1,1", + "v4:128,64,16,64,32,32,16,1,2,2,0,0,1,1", + "v4:32,128,16,32,32,32,16,1,1,2,0,0,1,1", + "v4:64,64,16,32,64,16,16,1,1,2,0,0,1,1", + "v4:64,128,8,32,128,16,8,1,3,2,0,0,1,1", + "v4:256,256,8,256,32,16,16,1,2,2,0,0,1,1", + "v4:64,256,4,64,64,32,8,1,4,2,0,0,1,1", + "v4:64,128,16,64,32,32,16,1,1,2,0,0,1,1", + "v4:64,256,8,64,32,32,8,1,2,2,0,0,1,1", + "v4:64,64,32,16,32,16,16,1,2,2,0,0,1,1", + "v4:128,32,32,32,32,32,16,1,2,2,0,0,1,1", + "v4:192,64,8,96,32,32,16,1,2,2,0,0,1,1", + "v4:128,128,16,64,32,32,16,1,1,2,0,0,1,1", + "v4:256,128,32,128,32,16,1,1,1,2,0,0,1,1", + "v4:128,128,32,16,64,16,16,1,1,2,0,0,1,1", + "v4:128,256,8,128,32,16,16,1,4,2,0,0,1,1", + "v4:256,128,16,32,128,32,16,1,1,2,0,0,1,1", + "v4:128,128,16,32,64,16,16,1,2,1,1,4,1,1", + "v4:256,256,16,32,128,16,8,1,1,2,0,0,1,1" }; // END_GEMM_XDL_i8_gfx950_DEFS @@ -1010,70 +1485,144 @@ const StringRef PopulateParamsXDL::initParametersFp4GemmGfx950[] = { // BEGIN_CONV_XDL_i8_gfx908_DEFS const StringRef PopulateParamsXDL::initParametersI8ConvGfx908[] = { - "v4:32,32,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,32,32,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:64,32,16,16,32,16,4,1,1,2,0,0,1,1", - "v4:64,16,16,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,64,4,16,64,16,4,1,1,2,0,0,1,1", - "v4:32,256,4,32,64,16,4,1,2,2,0,0,1,1", - "v4:128,32,8,128,32,32,1,1,1,2,0,0,1,1" + "v4:32,32,32,16,16,16,8,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,16,16,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,16,4,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,128,16,32,32,32,8,1,1,2,0,0,1,1", + "v4:128,32,8,32,32,32,16,1,1,2,0,0,1,1", + "v4:64,256,4,64,64,32,4,1,1,2,0,0,1,1", + "v4:128,64,8,64,32,32,8,1,1,2,0,0,1,1", + "v4:64,64,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,128,32,32,8,1,1,2,0,0,1,1", + "v4:128,64,16,64,32,32,4,1,1,2,0,0,1,1", + "v4:128,32,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,256,32,32,4,1,1,2,0,0,1,1", + "v4:64,64,4,16,16,16,4,1,2,2,0,0,1,1", + "v4:256,128,8,64,32,32,16,1,1,2,0,0,1,1", + "v4:192,16,16,48,16,16,8,1,2,2,0,0,1,1", + "v4:256,128,16,64,32,32,4,1,1,2,0,0,1,1", + "v4:256,64,8,32,32,32,16,1,1,2,0,0,1,1", + "v4:128,256,32,64,64,16,1,1,2,2,0,0,1,1" }; // END_CONV_XDL_i8_gfx908_DEFS // BEGIN_CONV_XDL_i8_gfx90a_DEFS const StringRef PopulateParamsXDL::initParametersI8ConvGfx90a[] = { - "v4:32,32,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,16,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,32,32,32,1,1,1,2,0,0,1,1", - "v4:64,32,32,32,16,16,8,1,2,2,0,0,1,1", - "v4:16,256,4,16,64,16,4,1,1,2,0,0,1,1", - "v4:32,64,16,32,16,16,1,1,1,2,0,0,1,1" + "v4:32,32,32,16,16,16,8,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:32,64,32,16,16,16,8,1,2,2,0,0,1,1", + "v4:128,16,16,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,32,32,32,16,1,1,2,0,0,1,1", + "v4:64,256,4,64,64,32,4,1,1,2,0,0,1,1", + "v4:64,16,32,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,128,32,64,32,16,4,1,1,2,0,0,1,1", + "v4:128,128,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:256,64,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:32,128,32,16,32,16,4,1,1,2,0,0,1,1", + "v4:128,32,16,32,32,32,4,1,2,2,0,0,1,1", + "v4:128,64,4,32,32,16,4,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,32,16,1,1,2,0,0,1,1", + "v4:128,128,4,64,16,16,8,1,2,2,0,0,1,1", + "v4:256,128,8,64,32,32,8,1,2,2,0,0,1,1", + "v4:128,128,8,64,16,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,64,64,32,16,1,1,2,0,0,1,1", + "v4:128,64,4,64,64,32,16,1,1,0,2,4,1,1", + "v4:256,128,4,256,32,32,16,1,1,2,0,0,1,1", + "v4:256,128,8,64,32,32,16,1,1,1,8,4,1,1", + "v4:32,128,8,32,128,32,4,1,2,2,0,0,1,1", + "v4:64,128,4,64,128,32,4,1,1,0,0,8,1,1", + "v4:64,16,16,16,16,16,4,1,2,0,1,8,1,1", + "v4:64,64,32,32,64,16,4,1,2,2,0,0,1,1" }; // END_CONV_XDL_i8_gfx90a_DEFS // BEGIN_CONV_XDL_i8_gfx942_DEFS const StringRef PopulateParamsXDL::initParametersI8ConvGfx942[] = { - "v4:16,16,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,32,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,32,32,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,32,16,16,16,1,1,2,2,0,0,1,1", - "v4:64,16,32,16,16,16,16,1,2,2,0,0,1,1", - "v4:16,32,32,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,64,16,64,32,32,1,1,2,2,0,0,1,1", - "v4:128,128,16,128,32,32,1,1,2,2,0,0,1,1", - "v4:32,64,32,32,16,16,1,1,2,2,0,0,1,1" + "v4:32,16,16,16,16,16,8,1,2,2,0,0,1,1", + "v4:32,16,16,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,8,64,32,32,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,16,32,64,16,16,1,1,4,2,0,0,1,1", + "v4:128,64,8,32,64,32,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:32,64,16,32,32,32,1,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,32,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:256,32,4,128,32,32,16,1,2,2,0,0,1,1", + "v4:256,64,4,256,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,32,64,64,32,1,1,4,2,0,0,1,1", + "v4:128,64,8,32,64,32,8,1,2,2,0,0,1,1", + "v4:256,64,4,64,64,32,16,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,64,256,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,64,32,16,8,1,3,2,0,0,1,1", + "v4:128,128,8,64,32,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,128,32,32,16,1,1,2,0,0,1,1", + "v4:256,64,32,256,32,32,1,1,2,2,0,0,1,1", + "v4:128,256,4,128,128,16,8,1,1,2,0,0,1,1", + "v4:256,32,16,64,32,32,1,1,2,2,0,0,1,1", + "v4:128,64,4,32,32,16,16,1,1,2,0,0,1,1", + "v4:256,256,4,256,16,16,16,1,4,2,0,0,1,1", + "v4:256,64,32,64,32,32,1,1,2,2,0,0,1,1", + "v4:256,128,32,256,32,16,1,1,3,2,0,0,1,1", + "v4:64,192,4,32,96,16,8,1,2,2,0,0,1,1", + "v4:128,32,16,32,16,16,16,1,2,2,0,0,1,1", + "v4:256,16,8,16,16,16,8,1,2,0,0,64,1,1", + "v4:256,256,8,32,128,32,16,1,4,2,0,0,1,1", + "v4:256,80,8,16,80,16,8,1,2,0,2,64,1,1", + "v4:64,16,32,64,16,16,8,1,3,2,0,0,1,1" }; // END_CONV_XDL_i8_gfx942_DEFS // BEGIN_CONV_XDL_i8_gfx950_DEFS const StringRef PopulateParamsXDL::initParametersI8ConvGfx950[] = { - "v4:64,32,8,16,32,16,8,1,1,2,0,0,1,1", - "v4:32,32,16,32,32,32,1,1,1,2,0,0,1,1", - "v4:64,128,16,64,32,32,1,1,2,2,0,0,1,1", - "v4:128,16,8,64,16,16,16,1,2,2,0,0,1,1", - "v4:256,32,8,64,32,16,16,1,2,2,0,0,1,1", - "v4:64,64,32,32,32,16,1,1,1,2,0,0,1,1", - "v4:64,16,16,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,16,32,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,128,4,64,64,32,16,1,1,2,0,0,1,1", - "v4:256,64,4,128,32,32,16,1,2,2,0,0,1,1", + "v4:32,16,8,32,16,16,16,1,4,2,0,0,1,1", "v4:64,16,8,32,16,16,16,1,2,2,0,0,1,1", - "v4:64,256,4,64,64,32,8,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,16,1,1,2,0,0,1,1", - "v4:64,32,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:128,32,4,64,16,16,16,1,4,2,0,0,1,1", + "v4:128,128,8,128,16,16,8,1,4,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,16,32,32,32,8,1,3,2,0,0,1,1", + "v4:64,64,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,32,4,128,32,16,16,1,2,2,0,0,1,1", + "v4:32,32,16,32,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,32,32,32,32,1,1,4,2,0,0,1,1", + "v4:128,16,8,64,16,16,16,1,2,2,0,0,1,1", + "v4:64,32,4,64,32,16,8,1,2,2,0,0,1,1", + "v4:32,64,32,32,32,32,1,1,1,2,0,0,1,1", + "v4:128,32,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:32,64,32,32,32,32,1,1,2,2,0,0,1,1", + "v4:64,256,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:64,64,32,32,64,32,1,1,3,2,0,0,1,1", "v4:64,64,16,64,32,32,1,1,2,2,0,0,1,1", - "v4:16,32,32,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,16,16,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,16,16,16,16,8,1,2,2,0,0,1,1", - "v4:256,128,8,64,128,32,16,1,1,2,0,0,1,1", - "v4:64,256,4,64,64,32,8,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,16,1,1,2,0,0,1,1" + "v4:32,256,4,32,64,32,8,1,1,2,0,0,1,1", + "v4:128,128,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:256,64,4,64,64,16,16,1,2,2,0,0,1,1", + "v4:128,256,8,128,64,32,8,1,2,2,0,0,1,1", + "v4:64,64,8,64,64,32,8,1,2,2,0,0,1,1", + "v4:32,128,16,32,64,32,8,1,4,2,0,0,1,1", + "v4:32,64,8,32,64,32,16,1,1,2,0,0,1,1", + "v4:128,128,8,32,64,16,16,1,2,2,0,0,1,1", + "v4:256,32,8,64,32,32,16,1,2,2,0,0,1,1", + "v4:256,64,8,64,32,32,16,1,2,2,0,0,1,1", + "v4:128,128,16,64,16,16,16,1,1,2,0,0,1,1", + "v4:128,32,32,32,32,16,16,1,1,2,0,0,1,1", + "v4:256,128,8,64,64,32,16,1,1,2,0,0,1,1", + "v4:32,128,4,16,128,16,8,1,3,2,0,0,1,1", + "v4:64,64,16,32,64,32,16,1,1,2,0,0,1,1", + "v4:128,256,4,32,128,32,16,1,2,2,0,0,1,1", + "v4:256,32,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:256,64,4,64,32,32,16,1,2,2,0,0,1,1", + "v4:128,64,8,128,64,32,16,1,2,2,0,0,1,1", + "v4:256,16,4,16,16,16,16,1,2,0,1,4,1,1", + "v4:256,32,8,32,16,16,16,1,2,0,1,64,1,1", + "v4:256,80,8,16,80,16,8,1,2,0,4,32,1,1", + "v4:32,64,16,16,16,16,16,1,4,2,0,0,1,1", + "v4:32,64,16,16,32,16,16,1,4,2,0,0,1,1", + "v4:64,16,32,64,16,16,16,1,2,2,0,0,1,1" }; // END_CONV_XDL_i8_gfx950_DEFS @@ -1082,82 +1631,82 @@ const StringRef PopulateParamsXDL::initParametersI8ConvGfx950[] = { #ifdef XDL_DECLARATIONS_GEN // BEGIN_GEMM_XDL_f32_gfx908_DECS -static constexpr size_t nInitParametersF32GemmGfx908 = 8; +static constexpr size_t nInitParametersF32GemmGfx908 = 15; static const StringRef initParametersF32GemmGfx908[nInitParametersF32GemmGfx908]; // END_GEMM_XDL_f32_gfx908_DECS // BEGIN_GEMM_XDL_f32_gfx90a_DECS -static constexpr size_t nInitParametersF32GemmGfx90a = 7; +static constexpr size_t nInitParametersF32GemmGfx90a = 13; static const StringRef initParametersF32GemmGfx90a[nInitParametersF32GemmGfx90a]; // END_GEMM_XDL_f32_gfx90a_DECS // BEGIN_GEMM_XDL_f32_gfx942_DECS -static constexpr size_t nInitParametersF32GemmGfx942 = 9; +static constexpr size_t nInitParametersF32GemmGfx942 = 17; static const StringRef initParametersF32GemmGfx942[nInitParametersF32GemmGfx942]; // END_GEMM_XDL_f32_gfx942_DECS // BEGIN_GEMM_XDL_f32_gfx950_DECS -static constexpr size_t nInitParametersF32GemmGfx950 = 6; +static constexpr size_t nInitParametersF32GemmGfx950 = 10; static const StringRef initParametersF32GemmGfx950[nInitParametersF32GemmGfx950]; // END_GEMM_XDL_f32_gfx950_DECS // BEGIN_CONV_XDL_f32_gfx908_DECS -static constexpr size_t nInitParametersF32ConvGfx908 = 18; +static constexpr size_t nInitParametersF32ConvGfx908 = 39; static const StringRef initParametersF32ConvGfx908[nInitParametersF32ConvGfx908]; // END_CONV_XDL_f32_gfx908_DECS // BEGIN_CONV_XDL_f32_gfx90a_DECS -static constexpr size_t nInitParametersF32ConvGfx90a = 18; +static constexpr size_t nInitParametersF32ConvGfx90a = 47; static const StringRef initParametersF32ConvGfx90a[nInitParametersF32ConvGfx90a]; // END_CONV_XDL_f32_gfx90a_DECS // BEGIN_CONV_XDL_f32_gfx942_DECS -static constexpr size_t nInitParametersF32ConvGfx942 = 23; +static constexpr size_t nInitParametersF32ConvGfx942 = 60; static const StringRef initParametersF32ConvGfx942[nInitParametersF32ConvGfx942]; // END_CONV_XDL_f32_gfx942_DECS // BEGIN_CONV_XDL_f32_gfx950_DECS -static constexpr size_t nInitParametersF32ConvGfx950 = 33; +static constexpr size_t nInitParametersF32ConvGfx950 = 60; static const StringRef initParametersF32ConvGfx950[nInitParametersF32ConvGfx950]; // END_CONV_XDL_f32_gfx950_DECS // BEGIN_GEMM_XDL_f16_gfx908_DECS -static constexpr size_t nInitParametersF16GemmGfx908 = 17; +static constexpr size_t nInitParametersF16GemmGfx908 = 21; static const StringRef initParametersF16GemmGfx908[nInitParametersF16GemmGfx908]; // END_GEMM_XDL_f16_gfx908_DECS // BEGIN_GEMM_XDL_f16_gfx90a_DECS -static constexpr size_t nInitParametersF16GemmGfx90a = 17; +static constexpr size_t nInitParametersF16GemmGfx90a = 24; static const StringRef initParametersF16GemmGfx90a[nInitParametersF16GemmGfx90a]; // END_GEMM_XDL_f16_gfx90a_DECS // BEGIN_GEMM_XDL_f16_gfx942_DECS -static constexpr size_t nInitParametersF16GemmGfx942 = 21; +static constexpr size_t nInitParametersF16GemmGfx942 = 27; static const StringRef initParametersF16GemmGfx942[nInitParametersF16GemmGfx942]; // END_GEMM_XDL_f16_gfx942_DECS // BEGIN_GEMM_XDL_f16_gfx950_DECS -static constexpr size_t nInitParametersF16GemmGfx950 = 14; +static constexpr size_t nInitParametersF16GemmGfx950 = 28; static const StringRef initParametersF16GemmGfx950[nInitParametersF16GemmGfx950]; // END_GEMM_XDL_f16_gfx950_DECS // BEGIN_CONV_XDL_f16_gfx908_DECS -static constexpr size_t nInitParametersF16ConvGfx908 = 14; +static constexpr size_t nInitParametersF16ConvGfx908 = 58; static const StringRef initParametersF16ConvGfx908[nInitParametersF16ConvGfx908]; // END_CONV_XDL_f16_gfx908_DECS // BEGIN_CONV_XDL_f16_gfx90a_DECS -static constexpr size_t nInitParametersF16ConvGfx90a = 16; +static constexpr size_t nInitParametersF16ConvGfx90a = 71; static const StringRef initParametersF16ConvGfx90a[nInitParametersF16ConvGfx90a]; // END_CONV_XDL_f16_gfx90a_DECS // BEGIN_CONV_XDL_f16_gfx942_DECS -static constexpr size_t nInitParametersF16ConvGfx942 = 20; +static constexpr size_t nInitParametersF16ConvGfx942 = 87; static const StringRef initParametersF16ConvGfx942[nInitParametersF16ConvGfx942]; // END_CONV_XDL_f16_gfx942_DECS // BEGIN_CONV_XDL_f16_gfx950_DECS -static constexpr size_t nInitParametersF16ConvGfx950 = 26; +static constexpr size_t nInitParametersF16ConvGfx950 = 81; static const StringRef initParametersF16ConvGfx950[nInitParametersF16ConvGfx950]; // END_CONV_XDL_f16_gfx950_DECS @@ -1172,22 +1721,22 @@ static const StringRef initParametersFp8ConvGfx900[nInitParametersFp8ConvGfx900] // END_CONV_XDL_fp8_gfx900_DECS // BEGIN_GEMM_XDL_i8_gfx908_DECS -static constexpr size_t nInitParametersI8GemmGfx908 = 8; +static constexpr size_t nInitParametersI8GemmGfx908 = 19; static const StringRef initParametersI8GemmGfx908[nInitParametersI8GemmGfx908]; // END_GEMM_XDL_i8_gfx908_DECS // BEGIN_GEMM_XDL_i8_gfx90a_DECS -static constexpr size_t nInitParametersI8GemmGfx90a = 8; +static constexpr size_t nInitParametersI8GemmGfx90a = 25; static const StringRef initParametersI8GemmGfx90a[nInitParametersI8GemmGfx90a]; // END_GEMM_XDL_i8_gfx90a_DECS // BEGIN_GEMM_XDL_i8_gfx942_DECS -static constexpr size_t nInitParametersI8GemmGfx942 = 13; +static constexpr size_t nInitParametersI8GemmGfx942 = 22; static const StringRef initParametersI8GemmGfx942[nInitParametersI8GemmGfx942]; // END_GEMM_XDL_i8_gfx942_DECS // BEGIN_GEMM_XDL_i8_gfx950_DECS -static constexpr size_t nInitParametersI8GemmGfx950 = 14; +static constexpr size_t nInitParametersI8GemmGfx950 = 39; static const StringRef initParametersI8GemmGfx950[nInitParametersI8GemmGfx950]; // END_GEMM_XDL_i8_gfx950_DECS @@ -1197,22 +1746,22 @@ static const StringRef initParametersFp4GemmGfx950[nInitParametersFp4GemmGfx950] // END_GEMM_XDL_fp4_gfx950_DECS // BEGIN_CONV_XDL_i8_gfx908_DECS -static constexpr size_t nInitParametersI8ConvGfx908 = 8; +static constexpr size_t nInitParametersI8ConvGfx908 = 19; static const StringRef initParametersI8ConvGfx908[nInitParametersI8ConvGfx908]; // END_CONV_XDL_i8_gfx908_DECS // BEGIN_CONV_XDL_i8_gfx90a_DECS -static constexpr size_t nInitParametersI8ConvGfx90a = 9; +static constexpr size_t nInitParametersI8ConvGfx90a = 28; static const StringRef initParametersI8ConvGfx90a[nInitParametersI8ConvGfx90a]; // END_CONV_XDL_i8_gfx90a_DECS // BEGIN_CONV_XDL_i8_gfx942_DECS -static constexpr size_t nInitParametersI8ConvGfx942 = 11; +static constexpr size_t nInitParametersI8ConvGfx942 = 33; static const StringRef initParametersI8ConvGfx942[nInitParametersI8ConvGfx942]; // END_CONV_XDL_i8_gfx942_DECS // BEGIN_CONV_XDL_i8_gfx950_DECS -static constexpr size_t nInitParametersI8ConvGfx950 = 21; +static constexpr size_t nInitParametersI8ConvGfx950 = 43; static const StringRef initParametersI8ConvGfx950[nInitParametersI8ConvGfx950]; // END_CONV_XDL_i8_gfx950_DECS @@ -1242,32 +1791,6 @@ const StringRef PopulateParamsWmma::initParametersF16GemmGfx1000[] = { }; // END_GEMM_Wmma_f16_gfx1000_DEFS -// BEGIN_GEMM_Wmma_f16_gfx1100_DEFS -const StringRef PopulateParamsWmma::initParametersF16GemmGfx1100[] = { - "v4:128,64,4,128,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,64,32,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:256,128,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,2,128,32,16,8,1,2,2,0,0,1,1", - "v4:128,256,8,128,32,16,8,1,1,2,0,0,1,1", - "v4:32,256,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:128,32,2,32,32,16,8,1,2,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,64,2,32,64,16,8,1,2,2,0,0,1,1", - "v4:32,32,8,32,32,16,16,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,64,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,16,8,32,16,16,16,1,1,2,0,0,1,1" -}; -// END_GEMM_Wmma_f16_gfx1100_DEFS - // BEGIN_CONV_Wmma_f16_gfx1000_DEFS const StringRef PopulateParamsWmma::initParametersF16ConvGfx1000[] = { "v4:128,64,8,32,64,16,8,1,1,2,0,0,1,1", @@ -1299,38 +1822,6 @@ const StringRef PopulateParamsWmma::initParametersF16ConvGfx1000[] = { }; // END_CONV_Wmma_f16_gfx1000_DEFS -// BEGIN_CONV_Wmma_f16_gfx1100_DEFS -const StringRef PopulateParamsWmma::initParametersF16ConvGfx1100[] = { - "v4:64,128,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:32,64,4,32,32,16,8,1,1,2,0,0,1,1", - "v4:256,64,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:64,128,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:32,32,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:32,16,4,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,32,4,64,16,16,8,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,32,4,32,16,16,8,1,1,2,0,0,1,1", - "v4:128,64,2,32,64,16,8,1,2,2,0,0,1,1", - "v4:16,32,4,16,32,16,16,1,1,2,0,0,1,1", - "v4:64,64,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,256,2,32,64,16,8,1,1,2,0,0,1,1", - "v4:128,256,2,128,64,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:16,256,4,16,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,2,64,32,16,16,1,1,2,0,0,1,1", - "v4:256,64,2,64,32,16,8,1,2,2,0,0,1,1", - "v4:32,128,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:64,256,2,32,64,16,8,1,2,2,0,0,1,1", - "v4:32,128,4,32,32,16,4,1,1,2,0,0,1,1", - "v4:32,256,2,32,64,16,8,1,2,2,0,0,1,1", - "v4:64,256,2,32,64,16,8,1,1,2,0,0,1,1" -}; -// END_CONV_Wmma_f16_gfx1100_DEFS - // BEGIN_GEMM_Wmma_fp8_gfx1000_DEFS const StringRef PopulateParamsWmma::initParametersFp8GemmGfx1000[] = { "v4:128,128,4,32,64,16,16,1,1,2,0,0,1,1", @@ -1400,20 +1891,6 @@ const StringRef PopulateParamsWmma::initParametersI8GemmGfx1000[] = { }; // END_GEMM_Wmma_i8_gfx1000_DEFS -// BEGIN_GEMM_Wmma_i8_gfx1100_DEFS -const StringRef PopulateParamsWmma::initParametersI8GemmGfx1100[] = { - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,32,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:128,32,4,64,16,16,16,1,1,2,0,0,1,1", - "v4:256,64,4,128,32,16,16,1,1,2,0,0,1,1", - "v4:128,32,2,32,32,16,16,1,1,2,0,0,1,1", - "v4:64,16,4,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:128,64,8,16,16,16,4,1,2,2,0,0,1,1" -}; -// END_GEMM_Wmma_i8_gfx1100_DEFS - // BEGIN_CONV_Wmma_i8_gfx1000_DEFS const StringRef PopulateParamsWmma::initParametersI8ConvGfx1000[] = { "v4:128,64,8,32,64,16,16,1,1,2,0,0,1,1", @@ -1430,116 +1907,350 @@ const StringRef PopulateParamsWmma::initParametersI8ConvGfx1000[] = { }; // END_CONV_Wmma_i8_gfx1000_DEFS -// BEGIN_CONV_Wmma_i8_gfx1100_DEFS -const StringRef PopulateParamsWmma::initParametersI8ConvGfx1100[] = { - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:16,64,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:128,16,8,32,16,16,16,1,1,2,0,0,1,1", - "v4:256,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:128,64,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,128,8,16,32,16,16,1,2,2,0,0,1,1", - "v4:256,128,4,128,16,16,8,1,2,2,0,0,1,1", - "v4:256,64,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:32,128,2,32,32,16,8,1,1,2,0,0,1,1", - "v4:32,32,2,32,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,4,128,128,16,4,1,2,2,0,0,1,1", - "v4:128,128,4,128,64,16,4,1,2,2,0,0,1,1" -}; -// END_CONV_Wmma_i8_gfx1100_DEFS - // BEGIN_GEMM_Wmma_f16_gfx1201_DEFS const StringRef PopulateParamsWmma::initParametersF16GemmGfx1201[] = { - "v4:128,64,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,128,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,16,32,16,8,1,2,2,0,0,1,1", - "v4:64,128,2,32,64,16,8,1,2,2,0,0,1,1", - "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:128,256,4,64,64,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:32,128,4,32,32,16,8,1,2,2,0,0,1,1", - "v4:256,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,32,128,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,32,128,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,32,128,16,8,1,2,2,0,0,1,1", + "v4:64,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:16,32,8,16,32,16,16,1,2,2,0,0,1,1", "v4:256,128,4,128,32,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,64,32,16,8,1,1,2,0,0,1,1" + "v4:64,64,8,32,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,4,1,1,2,0,0,1,1", + "v4:16,32,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:16,128,8,16,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,2,2,0,0,1,1", + "v4:96,32,8,48,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,1,0,8,0,1,1", + "v4:80,64,8,80,16,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,64,16,16,8,1,1,2,0,0,1,1", + "v4:16,64,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,16,8,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,64,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,16,1,1,2,0,0,1,1", + "v4:256,256,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:128,128,2,128,32,16,16,1,2,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,16,1,2,2,0,0,1,1", + "v4:64,32,2,64,32,16,16,1,2,2,0,0,1,1", + "v4:64,64,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:96,192,4,96,48,16,4,1,2,2,0,0,1,1", + "v4:128,128,2,64,128,16,8,1,1,2,0,0,1,1", + "v4:128,16,4,128,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:16,64,8,16,64,16,4,1,2,2,0,0,1,1", + "v4:192,64,4,48,32,16,8,1,2,2,0,0,1,1", + "v4:256,128,4,128,64,16,8,1,2,2,0,0,1,1", + "v4:256,256,4,256,32,16,4,1,2,2,0,0,1,1", + "v4:128,128,8,128,16,16,8,1,2,2,0,0,1,1", + "v4:128,160,8,32,160,16,4,1,1,2,0,0,1,1", + "v4:128,64,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:16,128,4,16,128,16,4,1,1,2,0,0,1,1", + "v4:16,64,8,16,64,16,16,1,2,2,0,0,1,1", + "v4:256,128,2,128,64,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,128,64,16,8,1,1,2,0,0,1,1", + "v4:256,256,8,128,32,16,4,1,2,2,0,0,1,1", + "v4:64,128,8,32,128,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,64,128,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,32,128,16,8,1,2,2,0,0,1,1", + "v4:128,32,2,64,32,16,16,1,2,2,0,0,1,1", + "v4:128,32,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:16,96,4,16,96,16,16,1,2,2,0,0,1,1", + "v4:48,48,4,48,48,16,8,1,2,2,0,0,1,1", + "v4:64,16,8,64,16,16,4,1,2,2,0,0,1,1", + "v4:64,256,4,32,256,16,4,1,1,2,0,0,1,1", + "v4:64,256,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,2,128,64,16,16,1,2,2,0,0,1,1", + "v4:32,64,8,16,32,16,8,1,2,1,0,16,1,1", + "v4:128,128,4,128,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,128,64,16,16,1,2,2,0,0,1,1", + "v4:128,16,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:192,96,4,48,96,16,16,1,1,2,0,0,1,1", + "v4:32,256,2,32,256,16,16,1,1,2,0,0,1,1", + "v4:64,128,8,32,128,16,16,1,1,2,0,0,1,1", + "v4:96,64,8,96,16,16,8,1,1,2,0,0,1,1", + "v4:240,32,8,240,16,16,8,1,2,2,0,0,1,1", + "v4:256,32,8,256,32,16,4,1,1,2,0,0,1,1" }; // END_GEMM_Wmma_f16_gfx1201_DEFS // BEGIN_GEMM_Wmma_i8_gfx1201_DEFS const StringRef PopulateParamsWmma::initParametersI8GemmGfx1201[] = { - "v4:128,64,4,64,32,16,16,1,1,2,0,0,1,1", - "v4:32,32,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,128,4,64,64,16,16,1,1,2,0,0,1,1", - "v4:256,64,4,32,64,16,16,1,1,2,0,0,1,1", - "v4:128,32,4,32,32,16,16,1,1,2,0,0,1,1", - "v4:128,256,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:64,32,8,32,16,16,16,1,1,2,0,0,1,1", + "v4:64,32,8,16,32,16,16,1,2,2,0,0,1,1", "v4:256,128,4,128,32,16,16,1,1,2,0,0,1,1", - "v4:16,128,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:256,128,4,32,128,16,16,1,2,2,0,0,1,1", + "v4:192,64,4,96,32,16,16,1,1,2,0,0,1,1", "v4:256,128,8,64,64,16,8,1,2,2,0,0,1,1", - "v4:128,256,2,128,64,16,8,1,1,2,0,0,1,1", - "v4:256,256,8,64,32,16,8,1,2,2,0,0,1,1", - "v4:64,64,8,32,32,16,16,1,1,2,0,0,1,1" + "v4:16,32,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:128,64,4,128,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,128,32,16,8,1,2,2,0,0,1,1", + "v4:256,128,4,128,32,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,64,64,16,16,1,2,2,0,0,1,1", + "v4:256,64,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:160,128,4,160,16,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,64,32,16,16,1,2,2,0,0,1,1", + "v4:64,256,4,64,32,16,16,1,2,2,0,0,1,1", + "v4:128,64,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:64,64,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,16,128,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,32,64,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,64,64,16,16,1,2,2,0,0,1,1", + "v4:256,32,4,256,32,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,64,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,32,128,16,16,1,2,2,0,0,1,1", + "v4:128,16,8,128,16,16,16,1,2,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,2,2,0,0,1,1", + "v4:160,256,8,160,32,16,8,1,1,2,0,0,1,1", + "v4:256,64,4,256,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,64,128,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,16,4,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,8,128,64,16,4,1,1,2,0,0,1,1", + "v4:128,256,8,32,256,16,4,1,1,2,0,0,1,1", + "v4:128,32,2,128,32,16,16,1,2,2,0,0,1,1", + "v4:128,32,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:16,112,8,16,112,16,16,1,2,2,0,0,1,1", + "v4:192,192,8,48,192,16,16,1,1,2,0,0,1,1", + "v4:224,64,4,112,32,16,16,1,2,2,0,0,1,1", + "v4:256,128,2,64,128,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,64,128,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,256,16,16,8,1,2,2,0,0,1,1", + "v4:256,16,4,128,16,16,16,1,2,2,0,0,1,1", + "v4:256,256,4,128,128,16,8,1,1,2,0,0,1,1", + "v4:256,32,2,256,32,16,8,1,2,2,0,0,1,1", + "v4:112,112,2,112,112,16,16,1,2,2,0,0,1,1", + "v4:128,256,2,32,256,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,16,256,16,8,1,2,2,0,0,1,1", + "v4:16,80,4,16,80,16,16,1,1,2,0,0,1,1", + "v4:224,16,8,224,16,16,16,1,2,2,0,0,1,1", + "v4:240,192,4,240,48,16,16,1,2,2,0,0,1,1", + "v4:256,128,4,64,64,16,16,1,1,1,8,8,1,1", + "v4:256,128,8,32,128,16,4,1,2,2,0,0,1,1", + "v4:256,256,4,128,128,16,4,1,2,2,0,0,1,1", + "v4:256,256,8,64,128,16,16,1,1,2,0,0,1,1", + "v4:64,128,8,16,128,16,4,1,1,2,0,0,1,1", + "v4:64,64,8,64,64,16,8,1,2,2,0,0,1,1", + "v4:112,32,8,112,16,16,4,1,2,2,0,0,1,1", + "v4:192,32,8,96,16,16,16,1,2,1,0,8,1,1", + "v4:48,64,8,48,16,16,16,1,1,0,0,8,1,1", + "v4:64,256,8,64,256,16,16,1,1,2,0,0,1,1" }; // END_GEMM_Wmma_i8_gfx1201_DEFS // BEGIN_CONV_Wmma_f16_gfx1201_DEFS const StringRef PopulateParamsWmma::initParametersF16ConvGfx1201[] = { - "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:128,64,4,32,64,16,8,1,1,2,0,0,1,1", - "v4:128,64,8,32,64,16,8,1,1,2,0,0,1,1", - "v4:64,256,2,64,64,16,8,1,1,2,0,0,1,1", - "v4:128,32,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:64,32,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:128,128,2,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,64,64,16,8,1,2,2,0,0,1,1", + "v4:160,128,8,80,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,64,64,16,8,1,1,2,0,0,1,1", "v4:128,128,8,32,64,16,8,1,1,2,0,0,1,1", - "v4:256,128,4,64,64,16,8,1,1,2,0,0,1,1", - "v4:64,128,2,64,32,16,8,1,1,2,0,0,1,1", - "v4:256,64,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,64,16,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,256,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:192,256,8,96,32,16,8,1,1,2,0,0,1,1", "v4:128,16,8,32,16,16,8,1,1,2,0,0,1,1", - "v4:256,128,8,128,32,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,64,4,16,64,16,8,1,1,2,0,0,1,1", - "v4:32,128,4,32,32,16,8,1,2,2,0,0,1,1", - "v4:128,128,2,32,128,16,8,1,1,2,0,0,1,1", - "v4:64,128,4,16,128,16,8,1,1,2,0,0,1,1", + "v4:64,32,8,32,16,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,16,8,1,2,2,0,0,1,1", + "v4:64,128,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:128,256,2,128,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:128,32,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:192,256,4,192,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,2,128,64,16,8,1,2,2,0,0,1,1", + "v4:64,128,4,32,128,16,4,1,1,2,0,0,1,1", + "v4:32,128,2,32,128,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,32,128,16,8,1,2,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:128,16,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,256,2,32,32,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,128,64,16,4,1,1,2,0,0,1,1", + "v4:256,128,8,32,128,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:256,128,4,32,128,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,16,64,16,8,1,2,2,0,0,1,1", "v4:16,64,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:256,256,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", "v4:32,64,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:128,256,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:32,128,2,32,128,16,8,1,1,2,0,0,1,1", - "v4:64,128,8,32,32,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:32,128,8,16,32,16,8,1,2,2,0,0,1,1", - "v4:32,64,8,32,32,16,8,1,2,2,0,0,1,1", - "v4:64,64,4,64,16,16,8,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,8,1,2,2,0,0,1,1", - "v4:64,128,8,32,16,16,8,1,2,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:64,64,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:32,32,4,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1" + "v4:64,128,8,64,16,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:256,64,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:64,32,8,64,16,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,64,64,16,8,1,2,2,0,0,1,1", + "v4:64,128,2,32,64,16,16,1,2,2,0,0,1,1", + "v4:128,32,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,16,16,1,2,2,0,0,1,1", + "v4:128,128,2,64,32,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:256,256,4,128,64,16,8,1,2,2,0,0,1,1", + "v4:256,256,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:128,256,4,64,128,16,8,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,16,1,1,2,0,0,1,1", + "v4:256,128,2,128,64,16,16,1,2,2,0,0,1,1", + "v4:32,48,8,16,48,16,8,1,2,2,0,0,1,1", + "v4:128,128,8,128,16,16,8,1,2,2,0,0,1,1", + "v4:128,256,8,32,128,16,4,1,2,2,0,0,1,1", + "v4:256,128,2,256,32,16,16,1,1,2,0,0,1,1", + "v4:256,128,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:256,64,4,256,16,16,4,1,2,2,0,0,1,1", + "v4:128,128,4,32,128,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,128,64,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,32,64,16,8,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:160,80,2,160,80,16,8,1,1,2,0,0,1,1", + "v4:192,64,2,192,32,16,16,1,1,2,0,0,1,1", + "v4:256,32,8,32,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,128,16,4,1,2,2,0,0,1,1", + "v4:64,32,4,64,32,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,128,16,16,4,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,64,64,16,4,1,2,2,0,0,1,1", + "v4:256,256,8,64,128,16,8,1,1,2,0,0,1,1", + "v4:16,128,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:256,64,2,128,32,16,16,1,2,2,0,0,1,1", + "v4:32,32,4,32,32,16,16,1,1,2,0,0,1,1", + "v4:64,256,4,64,128,16,4,1,2,2,0,0,1,1", + "v4:64,256,4,64,128,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,64,128,16,4,1,1,2,0,0,1,1", + "v4:16,48,8,16,48,16,8,1,2,2,0,0,1,1", + "v4:256,128,2,32,128,16,16,1,2,2,0,0,1,1", + "v4:256,48,8,16,48,16,8,1,1,1,4,64,1,1", + "v4:32,16,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:64,128,8,32,64,16,8,1,2,2,0,0,1,1", + "v4:80,64,8,80,16,16,8,1,1,0,2,16,1,1", + "v4:96,48,8,48,48,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,128,64,16,4,1,1,2,0,0,1,1", + "v4:64,128,2,64,128,16,16,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,16,1,2,2,0,0,1,1", + "v4:96,240,2,48,240,16,8,1,1,2,0,0,1,1", + "v4:128,160,8,32,160,16,4,1,1,2,0,0,1,1", + "v4:128,256,2,32,256,16,16,1,1,2,0,0,1,1", + "v4:16,256,4,16,256,16,4,1,1,2,0,0,1,1", + "v4:192,128,8,192,64,16,4,1,1,2,0,0,1,1", + "v4:256,64,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:64,16,4,64,16,16,8,1,2,2,0,0,1,1", + "v4:128,32,4,64,32,16,16,1,1,2,0,0,1,1", + "v4:192,96,8,48,48,16,8,1,1,2,0,0,1,1", + "v4:32,32,4,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,128,2,64,32,16,8,1,2,1,0,4,1,1", + "v4:64,192,2,64,96,16,16,1,2,2,0,0,1,1", + "v4:96,32,8,48,32,16,8,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:192,256,2,192,64,16,8,1,1,2,0,0,1,1", + "v4:224,32,2,112,32,16,16,1,2,2,0,0,1,1", + "v4:32,240,4,16,240,16,16,1,2,2,0,0,1,1", + "v4:64,16,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:64,32,4,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,4,128,64,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,64,64,16,8,1,1,0,8,32,1,1", + "v4:128,160,4,32,160,16,8,1,1,2,0,0,1,1", + "v4:16,80,8,16,80,16,16,1,1,2,0,0,1,1", + "v4:192,32,8,96,16,16,16,1,2,2,0,0,1,1", + "v4:192,32,8,96,16,16,8,1,2,2,0,0,1,1", + "v4:256,64,8,256,16,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,128,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:96,64,4,48,16,16,16,1,2,2,0,0,1,1", + "v4:112,16,4,112,16,16,8,1,2,2,0,0,1,1", + "v4:128,128,4,128,64,16,16,1,1,2,0,0,1,1", + "v4:128,64,8,32,64,16,8,1,1,1,2,4,1,1", + "v4:128,64,8,64,64,16,16,1,1,2,0,0,1,1", + "v4:16,160,4,16,160,16,16,1,1,2,0,0,1,1", + "v4:160,160,2,80,160,16,16,1,1,0,4,4,1,1", + "v4:160,32,8,80,32,16,8,1,1,0,16,8,1,1", + "v4:32,128,8,32,16,16,8,1,2,1,0,4,1,1", + "v4:32,16,8,32,16,16,16,1,2,0,0,16,1,1", + "v4:32,256,4,32,256,16,8,1,1,2,0,0,1,1", + "v4:48,128,8,48,16,16,8,1,2,1,16,16,1,1", + "v4:64,128,8,16,64,16,16,1,1,2,0,0,1,1", + "v4:64,224,4,32,224,16,16,1,2,2,0,0,1,1", + "v4:64,256,2,64,128,16,8,1,1,1,0,4,1,1", + "v4:64,256,8,16,256,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,16,16,16,8,1,2,0,2,16,1,1", + "v4:96,32,8,48,16,16,8,1,1,2,0,0,1,1", + "v4:96,32,8,96,32,16,16,1,2,2,0,0,1,1" }; // END_CONV_Wmma_f16_gfx1201_DEFS // BEGIN_CONV_Wmma_i8_gfx1201_DEFS const StringRef PopulateParamsWmma::initParametersI8ConvGfx1201[] = { - "v4:128,64,4,64,32,16,16,1,1,2,0,0,1,1", - "v4:256,64,4,32,64,16,16,1,2,2,0,0,1,1", - "v4:32,64,2,32,32,16,16,1,2,2,0,0,1,1", - "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,256,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,16,16,1,2,2,0,0,1,1", + "v4:256,32,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:256,64,8,32,64,16,16,1,1,2,0,0,1,1", "v4:64,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,16,8,32,16,16,16,1,1,2,0,0,1,1", - "v4:128,128,4,64,64,16,8,1,2,2,0,0,1,1", - "v4:128,128,4,64,64,16,16,1,1,2,0,0,1,1", - "v4:256,32,8,64,32,16,16,1,1,2,0,0,1,1", - "v4:32,256,2,32,128,16,8,1,1,2,0,0,1,1", - "v4:64,128,2,64,32,16,8,1,1,2,0,0,1,1", - "v4:256,128,8,32,128,16,8,1,1,2,0,0,1,1" + "v4:128,64,4,128,32,16,8,1,2,2,0,0,1,1", + "v4:256,128,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:64,128,2,64,128,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,32,128,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,128,64,16,4,1,1,2,0,0,1,1", + "v4:64,128,8,32,32,16,4,1,2,2,0,0,1,1", + "v4:64,64,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:96,128,4,96,32,16,16,1,2,2,0,0,1,1", + "v4:128,128,8,32,64,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,128,64,16,8,1,2,2,0,0,1,1", + "v4:256,64,4,256,16,16,8,1,2,2,0,0,1,1", + "v4:256,64,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:128,16,4,128,16,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:192,64,2,192,32,16,16,1,1,2,0,0,1,1", + "v4:256,256,4,256,64,16,8,1,2,2,0,0,1,1", + "v4:32,128,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,256,8,64,128,16,4,1,1,2,0,0,1,1", + "v4:128,32,8,64,32,16,16,1,2,2,0,0,1,1", + "v4:160,64,4,160,32,16,8,1,2,2,0,0,1,1", + "v4:256,128,2,64,128,16,16,1,2,2,0,0,1,1", + "v4:256,128,8,256,32,16,4,1,2,2,0,0,1,1", + "v4:256,128,8,64,64,16,8,1,2,2,0,0,1,1", + "v4:64,64,4,64,64,16,16,1,2,2,0,0,1,1", + "v4:128,16,4,128,16,16,16,1,2,2,0,0,1,1", + "v4:128,16,4,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,16,8,64,16,16,16,1,2,2,0,0,1,1", + "v4:128,256,4,64,128,16,16,1,1,2,0,0,1,1", + "v4:128,32,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,64,64,16,16,1,2,2,0,0,1,1", + "v4:256,32,8,256,16,16,16,1,2,2,0,0,1,1", + "v4:256,64,2,256,64,16,8,1,2,2,0,0,1,1", + "v4:64,64,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,96,8,32,48,16,16,1,1,2,0,0,1,1", + "v4:256,16,8,64,16,16,16,1,1,2,0,0,1,1", + "v4:256,256,8,128,128,16,4,1,1,2,0,0,1,1", + "v4:32,128,8,16,64,16,16,1,2,2,0,0,1,1", + "v4:64,256,4,32,128,16,16,1,1,2,0,0,1,1", + "v4:96,192,8,96,96,16,8,1,2,2,0,0,1,1", + "v4:128,256,2,32,128,16,8,1,2,2,0,0,1,1", + "v4:128,256,8,64,128,16,8,1,1,2,0,0,1,1", + "v4:128,96,8,32,96,16,4,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:192,48,4,192,48,16,4,1,2,2,0,0,1,1", + "v4:256,128,4,64,128,16,8,1,2,2,0,0,1,1", + "v4:256,16,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:128,16,4,128,16,16,8,1,2,2,0,0,1,1", + "v4:128,256,8,128,64,16,8,1,2,0,4,0,1,1", + "v4:16,128,8,16,128,16,8,1,1,2,0,0,1,1", + "v4:16,256,4,16,64,16,16,1,2,0,8,16,1,1", + "v4:256,128,8,128,64,16,16,1,1,2,0,0,1,1", + "v4:256,16,8,256,16,16,16,1,2,2,0,0,1,1", + "v4:256,256,4,64,256,16,16,1,1,2,0,0,1,1", + "v4:64,128,4,64,128,16,16,1,1,2,0,0,1,1", + "v4:64,256,2,64,256,16,16,1,1,2,0,0,1,1" }; // END_CONV_Wmma_i8_gfx1201_DEFS @@ -1681,139 +2392,351 @@ const StringRef PopulateParamsWmma::initParametersI8ConvGfx1151[] = { const StringRef PopulateParamsWmma::initParametersF16GemmGfx1150[] = { "v4:64,128,8,32,64,16,8,1,1,2,0,0,1,1", "v4:128,256,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:128,256,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,256,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:32,256,4,16,128,16,8,1,1,2,0,0,1,1", "v4:64,128,4,32,64,16,8,1,1,2,0,0,1,1", - "v4:64,64,4,64,64,16,8,1,1,2,0,0,1,1", - "v4:64,256,8,16,128,16,8,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:128,64,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:128,256,2,128,32,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,32,64,16,8,1,1,2,0,0,1,1", "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:256,128,4,128,64,16,4,1,1,2,0,0,1,1", - "v4:32,64,4,32,64,16,16,1,1,2,0,0,1,1", - "v4:256,128,4,64,64,16,16,1,1,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,2,2,0,0,1,1", - "v4:128,64,4,32,32,16,4,1,1,2,0,0,1,1", - "v4:64,32,4,16,32,16,4,1,1,2,0,0,1,1", + "v4:64,32,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,32,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:32,64,8,32,32,16,8,1,2,2,0,0,1,1", + "v4:128,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,2,128,64,16,8,1,1,2,0,0,1,1", + "v4:128,256,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:256,256,2,128,32,16,8,1,2,2,0,0,1,1", + "v4:256,256,2,256,32,16,8,1,1,2,0,0,1,1", "v4:128,128,8,64,32,16,16,1,1,2,0,0,1,1", - "v4:256,64,8,128,32,16,8,1,1,2,0,0,1,1" + "v4:256,128,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,2,2,0,0,1,1", + "v4:256,64,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,1,2,0,0,1,1" }; // END_GEMM_Wmma_f16_gfx1150_DEFS // BEGIN_GEMM_Wmma_i8_gfx1150_DEFS const StringRef PopulateParamsWmma::initParametersI8GemmGfx1150[] = { + "v4:64,128,4,32,64,16,16,1,1,2,0,0,1,1", "v4:128,256,4,128,32,16,16,1,1,2,0,0,1,1", - "v4:128,64,2,64,32,16,16,1,2,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", - "v4:128,64,8,128,16,16,16,1,1,2,0,0,1,1", - "v4:128,128,4,128,32,16,16,1,1,2,0,0,1,1", - "v4:32,256,2,32,64,16,16,1,1,2,0,0,1,1", - "v4:32,64,4,32,16,16,16,1,2,2,0,0,1,1", - "v4:64,128,2,64,32,16,16,1,2,2,0,0,1,1", - "v4:256,256,4,128,32,16,16,1,1,2,0,0,1,1", - "v4:32,64,8,16,64,16,16,1,1,2,0,0,1,1", - "v4:128,128,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:64,128,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:256,64,4,256,16,16,16,1,1,2,0,0,1,1", + "v4:128,64,2,128,32,16,16,1,1,2,0,0,1,1", + "v4:256,64,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:64,16,8,32,16,16,16,1,2,2,0,0,1,1", + "v4:32,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,16,64,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:256,256,2,128,32,16,16,1,2,2,0,0,1,1", + "v4:128,64,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,64,8,16,32,16,16,1,2,2,0,0,1,1", "v4:64,256,8,16,128,16,16,1,1,2,0,0,1,1", - "v4:64,64,4,64,64,16,16,1,1,2,0,0,1,1", - "v4:256,32,4,128,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,2,2,0,0,1,1", - "v4:256,64,4,128,16,16,16,1,1,2,0,0,1,1" + "v4:16,256,8,16,256,16,16,1,1,2,0,0,1,1", + "v4:16,256,8,16,256,16,16,1,2,2,0,0,1,1", + "v4:256,256,2,128,256,16,8,1,1,2,0,0,1,1", + "v4:256,256,2,256,128,16,16,1,2,2,0,0,1,1", + "v4:256,256,2,256,128,16,8,1,1,2,0,0,1,1", + "v4:256,256,4,128,256,16,16,1,2,2,0,0,1,1", + "v4:256,256,4,256,128,16,16,1,2,2,0,0,1,1" }; // END_GEMM_Wmma_i8_gfx1150_DEFS // BEGIN_CONV_Wmma_f16_gfx1150_DEFS const StringRef PopulateParamsWmma::initParametersF16ConvGfx1150[] = { - "v4:64,128,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:128,64,8,128,16,16,8,1,1,2,0,0,1,1", - "v4:128,64,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:64,256,2,64,64,16,8,1,1,2,0,0,1,1", "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,16,128,16,8,1,1,2,0,0,1,1", - "v4:64,256,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:128,64,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:32,64,4,32,64,16,4,1,1,2,0,0,1,1", - "v4:128,256,4,128,32,16,8,1,1,2,0,0,1,1", - "v4:128,128,2,128,32,16,16,1,1,2,0,0,1,1", - "v4:256,128,8,128,32,16,8,1,1,2,0,0,1,1", - "v4:128,64,2,128,32,16,8,1,1,2,0,0,1,1", - "v4:256,256,8,128,32,16,8,1,1,2,0,0,1,1", - "v4:64,128,8,64,32,16,8,1,1,2,0,0,1,1", - "v4:64,128,8,64,16,16,8,1,1,2,0,0,1,1", - "v4:32,128,4,32,64,16,8,1,1,2,0,0,1,1", - "v4:128,128,8,32,64,16,8,1,1,2,0,0,1,1", - "v4:256,128,8,64,32,16,8,1,1,2,0,0,1,1", - "v4:32,128,2,32,128,16,8,1,1,2,0,0,1,1", - "v4:64,128,2,64,32,16,8,1,2,2,0,0,1,1", - "v4:256,64,4,64,64,16,8,1,1,2,0,0,1,1", - "v4:64,128,4,64,16,16,8,1,2,2,0,0,1,1", - "v4:32,128,2,32,32,16,8,1,2,2,0,0,1,1", - "v4:128,64,4,128,32,16,4,1,1,2,0,0,1,1", - "v4:64,256,4,64,16,16,8,1,2,2,0,0,1,1", - "v4:256,128,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:64,32,4,32,32,16,8,1,1,2,0,0,1,1", - "v4:16,128,4,16,128,16,8,1,1,2,0,0,1,1", - "v4:32,256,2,32,128,16,8,1,1,2,0,0,1,1", - "v4:128,64,2,32,32,16,8,1,1,2,0,0,1,1", - "v4:256,64,8,64,32,16,8,1,1,2,0,0,1,1", - "v4:32,32,4,32,16,16,8,1,2,2,0,0,1,1", - "v4:256,64,2,32,64,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,16,64,16,8,1,2,2,0,0,1,1", "v4:64,32,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:16,32,8,16,32,16,8,1,1,2,0,0,1,1", - "v4:64,64,8,16,32,16,8,1,2,2,0,0,1,1", - "v4:16,32,4,16,32,16,8,1,2,2,0,0,1,1", - "v4:128,256,8,128,32,16,8,1,1,2,0,0,1,1", - "v4:64,128,8,16,128,16,8,1,2,2,0,0,1,1", - "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:256,32,4,64,32,16,8,1,1,2,0,0,1,1", - "v4:32,64,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:256,128,8,32,64,16,8,1,1,2,0,0,1,1", - "v4:32,128,8,32,32,16,8,1,2,2,0,0,1,1", - "v4:32,16,4,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,64,2,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,32,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,64,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,16,16,1,1,2,0,0,1,1", + "v4:128,64,2,128,32,16,16,1,1,2,0,0,1,1", + "v4:64,128,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,64,64,16,8,1,2,2,0,0,1,1", + "v4:32,128,4,32,32,16,8,1,1,2,0,0,1,1", + "v4:256,64,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:32,64,2,32,64,16,8,1,1,2,0,0,1,1", + "v4:32,128,8,16,32,16,8,1,1,2,0,0,1,1", + "v4:256,64,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,4,1,2,2,0,0,1,1", + "v4:16,128,8,16,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,2,32,64,16,8,1,1,2,0,0,1,1", + "v4:256,128,2,64,64,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,128,16,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,32,16,16,8,1,1,2,0,0,1,1", + "v4:16,128,4,16,32,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,128,64,16,4,1,1,2,0,0,1,1", + "v4:32,128,2,32,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:32,128,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:256,128,4,256,32,16,4,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,4,1,1,2,0,0,1,1", "v4:32,32,8,16,16,16,8,1,2,2,0,0,1,1", - "v4:16,64,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,32,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:256,32,8,32,32,16,8,1,1,2,0,0,1,1", - "v4:16,128,8,16,64,16,8,1,1,2,0,0,1,1", - "v4:64,32,8,32,16,16,16,1,1,2,0,0,1,1", - "v4:16,16,4,16,16,16,4,1,1,2,0,0,1,1", - "v4:128,32,4,16,32,16,16,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:128,16,4,32,16,16,8,1,2,2,0,0,1,1", - "v4:64,16,8,32,16,16,16,1,1,2,0,0,1,1", - "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1" + "v4:128,128,2,128,64,16,16,1,1,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,256,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,64,8,32,32,16,4,1,1,2,0,0,1,1", + "v4:256,128,8,256,16,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,64,8,16,32,16,8,1,2,2,0,0,1,1", + "v4:256,64,4,128,32,16,4,1,1,2,0,0,1,1", + "v4:128,256,2,32,256,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,128,2,64,128,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,256,32,16,8,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,128,32,16,16,1,2,2,0,0,1,1" }; // END_CONV_Wmma_f16_gfx1150_DEFS // BEGIN_CONV_Wmma_i8_gfx1150_DEFS const StringRef PopulateParamsWmma::initParametersI8ConvGfx1150[] = { - "v4:128,32,4,32,32,16,16,1,2,2,0,0,1,1", - "v4:16,128,4,16,128,16,4,1,1,2,0,0,1,1", - "v4:256,32,2,32,32,16,16,1,1,2,0,0,1,1", - "v4:64,128,4,32,64,16,8,1,1,2,0,0,1,1", - "v4:256,64,2,64,64,16,16,1,2,2,0,0,1,1", - "v4:64,64,4,16,64,16,16,1,1,2,0,0,1,1", - "v4:128,256,2,128,64,16,8,1,1,2,0,0,1,1", - "v4:128,64,8,128,32,16,4,1,2,2,0,0,1,1", - "v4:64,64,8,16,64,16,16,1,1,2,0,0,1,1", - "v4:128,64,4,64,64,16,4,1,1,2,0,0,1,1", - "v4:128,64,8,64,16,16,16,1,1,2,0,0,1,1", - "v4:128,128,2,128,32,16,8,1,2,2,0,0,1,1", - "v4:256,64,8,64,64,16,8,1,1,2,0,0,1,1", - "v4:256,32,4,64,32,16,16,1,1,2,0,0,1,1", - "v4:256,32,8,16,32,16,16,1,1,2,0,0,1,1", - "v4:32,256,4,32,128,16,4,1,2,2,0,0,1,1", + "v4:256,64,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:256,64,8,64,32,16,16,1,1,2,0,0,1,1", + "v4:64,32,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,128,32,16,4,1,1,2,0,0,1,1", + "v4:64,32,4,64,32,16,4,1,1,2,0,0,1,1", + "v4:64,32,8,16,32,16,16,1,2,2,0,0,1,1", + "v4:64,128,8,32,16,16,16,1,2,2,0,0,1,1", "v4:64,128,2,32,64,16,16,1,2,2,0,0,1,1", - "v4:128,16,4,16,16,16,8,1,1,2,0,0,1,1", - "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", - "v4:256,16,8,16,16,16,16,1,1,2,0,0,1,1", - "v4:256,32,8,128,32,16,16,1,1,2,0,0,1,1" + "v4:32,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:64,128,4,32,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,4,128,64,16,8,1,1,2,0,0,1,1", + "v4:256,32,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:64,128,4,64,128,16,4,1,1,2,0,0,1,1", + "v4:256,16,8,32,16,16,16,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,2,2,0,0,1,1", + "v4:16,256,4,16,256,16,16,1,1,2,0,0,1,1" }; // END_CONV_Wmma_i8_gfx1150_DEFS +// BEGIN_GEMM_Wmma_f16_gfx1101_DEFS +const StringRef PopulateParamsWmma::initParametersF16GemmGfx1101[] = { + "v4:128,64,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:32,32,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:16,64,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:128,128,4,64,64,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,2,2,0,0,1,1", + "v4:16,96,8,16,48,16,8,1,1,2,0,0,1,1", + "v4:192,256,8,96,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,32,128,16,8,1,1,2,0,0,1,1", + "v4:32,256,2,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,32,4,32,32,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,64,64,16,8,1,1,2,0,0,1,1", + "v4:32,128,2,32,64,16,8,1,2,2,0,0,1,1", + "v4:16,256,4,16,64,16,8,1,2,2,0,0,1,1", + "v4:128,16,8,64,16,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,32,128,16,8,1,2,2,0,0,1,1", + "v4:16,128,8,16,32,16,16,1,1,2,0,0,1,1", + "v4:96,128,2,96,32,16,8,1,2,2,0,0,1,1", + "v4:224,64,4,112,32,16,8,1,1,0,1,8,1,1", + "v4:96,48,8,48,48,16,8,1,1,0,16,8,1,1", + "v4:128,128,8,64,64,16,8,1,1,0,8,32,1,1", + "v4:128,64,2,32,64,16,16,1,2,2,0,0,1,1", + "v4:256,32,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,256,4,16,256,16,16,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,8,1,1,0,16,0,1,1", + "v4:16,80,8,16,80,16,16,1,1,2,0,0,1,1", + "v4:192,64,8,48,32,16,8,1,1,1,4,0,1,1", + "v4:224,256,8,112,32,16,8,1,1,2,0,0,1,1", + "v4:224,64,8,112,32,16,8,1,1,1,8,64,1,1", + "v4:256,256,8,32,64,16,8,1,1,2,0,0,1,1", + "v4:256,64,8,64,64,16,4,1,1,2,0,0,1,1", + "v4:48,96,8,48,48,16,4,1,1,0,16,0,1,1", + "v4:64,128,8,32,128,16,8,1,1,2,0,0,1,1" +}; +// END_GEMM_Wmma_f16_gfx1101_DEFS + +// BEGIN_GEMM_Wmma_i8_gfx1101_DEFS +const StringRef PopulateParamsWmma::initParametersI8GemmGfx1101[] = { + "v4:48,64,4,48,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,16,32,16,16,1,2,2,0,0,1,1", + "v4:128,64,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:16,128,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,64,8,32,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,2,32,64,16,16,1,1,2,0,0,1,1", + "v4:128,96,2,32,48,16,16,1,1,2,0,0,1,1", + "v4:64,128,2,64,64,16,16,1,1,2,0,0,1,1", + "v4:32,128,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:144,64,4,144,16,16,8,1,1,2,0,0,1,1", + "v4:64,128,2,32,128,16,16,1,2,2,0,0,1,1", + "v4:64,256,8,16,128,16,8,1,2,2,0,0,1,1", + "v4:192,128,4,192,16,16,16,1,1,2,0,0,1,1", + "v4:256,32,8,128,16,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,64,32,16,16,1,1,2,0,0,1,1", + "v4:256,32,4,32,32,16,16,1,1,0,16,16,1,1", + "v4:64,128,4,32,128,16,16,1,1,0,4,4,1,1" +}; +// END_GEMM_Wmma_i8_gfx1101_DEFS + +// BEGIN_CONV_Wmma_f16_gfx1101_DEFS +const StringRef PopulateParamsWmma::initParametersF16ConvGfx1101[] = { + "v4:64,128,4,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,128,2,64,32,16,8,1,1,2,0,0,1,1", + "v4:32,64,4,32,32,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,32,32,16,8,1,1,2,0,0,1,1", + "v4:128,64,4,128,16,16,8,1,1,2,0,0,1,1", + "v4:48,64,8,48,16,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,16,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,2,64,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,2,64,64,16,8,1,1,2,0,0,1,1", + "v4:16,16,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,16,8,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,32,16,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:16,32,8,16,16,16,8,1,1,2,0,0,1,1", + "v4:64,32,2,32,32,16,8,1,2,2,0,0,1,1", + "v4:96,64,4,48,16,16,8,1,2,2,0,0,1,1", + "v4:128,64,4,32,64,16,8,1,2,2,0,0,1,1", + "v4:128,64,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,64,8,16,64,16,8,1,2,2,0,0,1,1", + "v4:224,64,4,112,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,16,128,16,8,1,1,2,0,0,1,1", + "v4:64,128,8,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,128,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:192,64,4,48,32,16,8,1,2,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,1,0,16,32,1,1", + "v4:128,128,2,64,64,16,8,1,1,2,0,0,1,1", + "v4:80,64,8,80,16,16,8,1,1,2,0,0,1,1", + "v4:256,64,4,128,16,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,16,16,16,8,1,2,2,0,0,1,1", + "v4:64,256,8,16,128,16,8,1,1,2,0,0,1,1", + "v4:32,32,8,32,32,16,4,1,2,2,0,0,1,1", + "v4:16,32,4,16,32,16,16,1,2,2,0,0,1,1", + "v4:32,128,4,32,128,16,4,1,1,2,0,0,1,1", + "v4:128,32,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:256,64,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:16,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:256,48,8,16,48,16,8,1,1,2,0,0,1,1", + "v4:192,96,2,48,96,16,8,1,1,2,0,0,1,1", + "v4:64,256,2,32,128,16,8,1,2,2,0,0,1,1", + "v4:64,256,8,64,32,16,8,1,1,2,0,0,1,1", + "v4:96,32,8,48,32,16,8,1,1,0,16,32,1,1", + "v4:128,64,4,32,16,16,8,1,1,2,0,0,1,1", + "v4:32,16,8,16,16,16,16,1,2,2,0,0,1,1", + "v4:64,32,8,32,16,16,8,1,1,0,16,32,1,1", + "v4:48,64,8,48,16,16,8,1,1,0,16,32,1,1", + "v4:128,32,4,32,32,16,16,1,2,2,0,0,1,1", + "v4:192,64,8,48,32,16,8,1,1,0,16,0,1,1", + "v4:192,64,8,48,32,16,8,1,1,0,16,16,1,1", + "v4:256,128,4,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,2,128,32,16,8,1,1,0,8,32,1,1", + "v4:128,128,8,64,32,16,8,1,1,0,4,0,1,1", + "v4:128,16,4,64,16,16,8,1,2,2,0,0,1,1", + "v4:256,128,8,32,128,16,8,1,1,0,16,64,1,1", + "v4:32,80,8,16,80,16,4,1,2,2,0,0,1,1", + "v4:64,16,8,16,16,16,8,1,2,1,16,4,1,1", + "v4:80,64,8,80,16,16,8,1,1,0,16,8,1,1", + "v4:128,128,8,128,16,16,8,1,1,0,16,16,1,1", + "v4:128,128,8,128,16,16,8,1,1,0,16,32,1,1", + "v4:128,16,8,32,16,16,8,1,1,0,16,8,1,1", + "v4:128,256,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:128,256,8,64,64,16,8,1,1,2,0,0,1,1", + "v4:128,32,8,16,32,16,8,1,1,0,16,32,1,1", + "v4:144,64,8,144,16,16,8,1,1,2,0,0,1,1", + "v4:192,64,8,48,32,16,8,1,1,0,16,64,1,1", + "v4:128,128,2,128,32,16,8,1,1,0,2,64,1,1", + "v4:128,128,2,128,32,16,8,1,1,0,4,64,1,1", + "v4:128,128,8,128,16,16,8,1,1,1,16,0,1,1", + "v4:128,64,4,128,16,16,16,1,1,2,0,0,1,1", + "v4:128,64,8,64,32,16,4,1,2,2,0,0,1,1", + "v4:16,160,4,16,160,16,8,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,1,0,16,64,1,1", + "v4:256,128,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:256,128,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:256,128,8,32,128,16,8,1,1,0,16,0,1,1", + "v4:64,128,4,64,32,16,16,1,1,2,0,0,1,1", + "v4:64,64,8,64,32,16,8,1,1,0,8,16,1,1", + "v4:96,128,4,96,32,16,8,1,1,0,16,32,1,1", + "v4:96,32,2,96,32,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,8,1,1,0,4,0,1,1", + "v4:128,128,8,64,64,16,8,1,1,1,0,16,1,1", + "v4:128,128,8,64,64,16,8,1,1,1,2,8,1,1", + "v4:128,32,8,32,32,16,8,1,1,1,16,16,1,1", + "v4:128,64,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,4,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,1,0,16,8,1,1", + "v4:160,128,8,80,32,16,8,1,1,0,16,0,1,1", + "v4:192,64,4,96,32,16,8,1,1,2,0,0,1,1", + "v4:256,256,8,128,32,16,8,1,1,2,0,0,1,1", + "v4:64,128,4,16,128,16,8,1,1,0,16,0,1,1", + "v4:64,128,8,32,32,16,8,1,1,0,8,64,1,1", + "v4:64,16,8,16,16,16,16,1,1,0,16,8,1,1", + "v4:64,256,4,64,64,16,4,1,1,0,16,64,1,1", + "v4:64,64,4,64,64,16,4,1,1,0,16,16,1,1", + "v4:96,64,8,48,32,16,8,1,1,0,16,64,1,1", + "v4:112,64,8,112,16,16,8,1,1,0,16,32,1,1", + "v4:128,128,2,128,32,16,16,1,1,0,16,16,1,1", + "v4:128,128,2,128,32,16,8,1,2,0,2,32,1,1", + "v4:128,128,4,32,64,16,8,1,1,1,8,4,1,1", + "v4:128,128,8,64,64,16,8,1,1,0,2,8,1,1", + "v4:128,128,8,64,64,16,8,1,1,1,0,32,1,1", + "v4:128,128,8,64,64,16,8,1,1,1,4,32,1,1", + "v4:128,64,2,64,64,16,16,1,1,0,8,16,1,1", + "v4:128,64,8,64,64,16,4,1,2,2,0,0,1,1", + "v4:128,96,8,32,96,16,8,1,1,0,16,4,1,1", + "v4:16,224,4,16,224,16,4,1,1,2,0,0,1,1", + "v4:16,64,4,16,64,16,8,1,1,0,0,0,1,1", + "v4:160,128,8,80,32,16,8,1,1,0,16,64,1,1", + "v4:160,128,8,80,32,16,8,1,1,0,4,64,1,1", + "v4:192,64,4,96,32,16,16,1,1,0,16,8,1,1", + "v4:256,128,4,64,64,16,16,1,1,0,2,64,1,1", + "v4:256,128,4,64,64,16,16,1,1,0,8,16,1,1", + "v4:256,128,8,32,128,16,8,1,1,1,16,8,1,1", + "v4:32,256,4,32,128,16,8,1,2,2,0,0,1,1", + "v4:32,64,4,32,16,16,4,1,1,0,8,32,1,1", + "v4:48,48,2,48,48,16,8,1,1,1,0,64,1,1", + "v4:64,112,8,16,112,16,8,1,1,0,16,32,1,1", + "v4:64,128,2,64,64,16,8,1,1,0,16,0,1,1", + "v4:64,128,4,16,64,16,16,1,2,2,0,0,1,1", + "v4:64,128,4,64,16,16,16,1,2,2,0,0,1,1", + "v4:64,128,8,32,64,16,16,1,1,2,0,0,1,1", + "v4:64,256,4,16,256,16,16,1,1,2,0,0,1,1", + "v4:64,256,8,64,32,16,8,1,1,0,1,32,1,1", + "v4:64,48,4,16,48,16,8,1,2,1,16,4,1,1", + "v4:64,96,4,32,48,16,8,1,1,0,16,64,1,1", + "v4:96,192,2,48,48,16,8,1,1,0,1,64,1,1" +}; +// END_CONV_Wmma_f16_gfx1101_DEFS + +// BEGIN_CONV_Wmma_i8_gfx1101_DEFS +const StringRef PopulateParamsWmma::initParametersI8ConvGfx1101[] = { + "v4:256,32,4,64,32,16,16,1,1,2,0,0,1,1", + "v4:64,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,16,8,16,16,16,16,1,1,2,0,0,1,1", + "v4:128,32,8,32,16,16,16,1,1,2,0,0,1,1", + "v4:128,64,4,32,64,16,16,1,1,2,0,0,1,1", + "v4:64,128,2,64,32,16,8,1,2,2,0,0,1,1", + "v4:64,128,4,64,64,16,4,1,1,2,0,0,1,1", + "v4:96,64,4,48,16,16,16,1,1,2,0,0,1,1", + "v4:256,64,4,128,32,16,16,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,4,1,1,2,0,0,1,1", + "v4:128,128,4,64,64,16,16,1,1,2,0,0,1,1", + "v4:32,64,4,32,64,16,8,1,1,2,0,0,1,1", + "v4:128,128,8,64,64,16,8,1,2,2,0,0,1,1", + "v4:128,256,4,64,16,16,8,1,2,2,0,0,1,1", + "v4:256,64,2,256,32,16,16,1,2,2,0,0,1,1", + "v4:32,256,4,16,256,16,16,1,2,2,0,0,1,1", + "v4:64,256,4,32,128,16,16,1,1,2,0,0,1,1", + "v4:16,128,8,16,128,16,16,1,2,2,0,0,1,1", + "v4:16,16,8,16,16,16,4,1,2,1,8,4,1,1", + "v4:256,128,8,128,128,16,8,1,1,2,0,0,1,1", + "v4:32,64,8,32,64,16,16,1,2,2,0,0,1,1" +}; +// END_CONV_Wmma_i8_gfx1101_DEFS + // BEGIN_GEMM_Wmma_f16_gfx1152_DEFS const StringRef PopulateParamsWmma::initParametersF16GemmGfx1152[] = { "v4:256,128,8,64,64,16,8,1,1,2,0,0,1,1", @@ -2139,21 +3062,11 @@ static constexpr size_t nInitParametersF16GemmGfx1000 = 17; static const StringRef initParametersF16GemmGfx1000[nInitParametersF16GemmGfx1000]; // END_GEMM_Wmma_f16_gfx1000_DECS -// BEGIN_GEMM_Wmma_f16_gfx1100_DECS -static constexpr size_t nInitParametersF16GemmGfx1100 = 21; -static const StringRef initParametersF16GemmGfx1100[nInitParametersF16GemmGfx1100]; -// END_GEMM_Wmma_f16_gfx1100_DECS - // BEGIN_CONV_Wmma_f16_gfx1000_DECS static constexpr size_t nInitParametersF16ConvGfx1000 = 26; static const StringRef initParametersF16ConvGfx1000[nInitParametersF16ConvGfx1000]; // END_CONV_Wmma_f16_gfx1000_DECS -// BEGIN_CONV_Wmma_f16_gfx1100_DECS -static constexpr size_t nInitParametersF16ConvGfx1100 = 27; -static const StringRef initParametersF16ConvGfx1100[nInitParametersF16ConvGfx1100]; -// END_CONV_Wmma_f16_gfx1100_DECS - // BEGIN_GEMM_Wmma_fp8_gfx1000_DECS static constexpr size_t nInitParametersFp8GemmGfx1000 = 18; static const StringRef initParametersFp8GemmGfx1000[nInitParametersFp8GemmGfx1000]; @@ -2169,38 +3082,28 @@ static constexpr size_t nInitParametersI8GemmGfx1000 = 15; static const StringRef initParametersI8GemmGfx1000[nInitParametersI8GemmGfx1000]; // END_GEMM_Wmma_i8_gfx1000_DECS -// BEGIN_GEMM_Wmma_i8_gfx1100_DECS -static constexpr size_t nInitParametersI8GemmGfx1100 = 9; -static const StringRef initParametersI8GemmGfx1100[nInitParametersI8GemmGfx1100]; -// END_GEMM_Wmma_i8_gfx1100_DECS - // BEGIN_CONV_Wmma_i8_gfx1000_DECS static constexpr size_t nInitParametersI8ConvGfx1000 = 11; static const StringRef initParametersI8ConvGfx1000[nInitParametersI8ConvGfx1000]; // END_CONV_Wmma_i8_gfx1000_DECS -// BEGIN_CONV_Wmma_i8_gfx1100_DECS -static constexpr size_t nInitParametersI8ConvGfx1100 = 13; -static const StringRef initParametersI8ConvGfx1100[nInitParametersI8ConvGfx1100]; -// END_CONV_Wmma_i8_gfx1100_DECS - // BEGIN_GEMM_Wmma_f16_gfx1201_DECS -static constexpr size_t nInitParametersF16GemmGfx1201 = 14; +static constexpr size_t nInitParametersF16GemmGfx1201 = 65; static const StringRef initParametersF16GemmGfx1201[nInitParametersF16GemmGfx1201]; // END_GEMM_Wmma_f16_gfx1201_DECS // BEGIN_GEMM_Wmma_i8_gfx1201_DECS -static constexpr size_t nInitParametersI8GemmGfx1201 = 13; +static constexpr size_t nInitParametersI8GemmGfx1201 = 59; static const StringRef initParametersI8GemmGfx1201[nInitParametersI8GemmGfx1201]; // END_GEMM_Wmma_i8_gfx1201_DECS // BEGIN_CONV_Wmma_f16_gfx1201_DECS -static constexpr size_t nInitParametersF16ConvGfx1201 = 36; +static constexpr size_t nInitParametersF16ConvGfx1201 = 141; static const StringRef initParametersF16ConvGfx1201[nInitParametersF16ConvGfx1201]; // END_CONV_Wmma_f16_gfx1201_DECS // BEGIN_CONV_Wmma_i8_gfx1201_DECS -static constexpr size_t nInitParametersI8ConvGfx1201 = 12; +static constexpr size_t nInitParametersI8ConvGfx1201 = 62; static const StringRef initParametersI8ConvGfx1201[nInitParametersI8ConvGfx1201]; // END_CONV_Wmma_i8_gfx1201_DECS @@ -2230,20 +3133,40 @@ static const StringRef initParametersF16GemmGfx1150[nInitParametersF16GemmGfx115 // END_GEMM_Wmma_f16_gfx1150_DECS // BEGIN_GEMM_Wmma_i8_gfx1150_DECS -static constexpr size_t nInitParametersI8GemmGfx1150 = 16; +static constexpr size_t nInitParametersI8GemmGfx1150 = 22; static const StringRef initParametersI8GemmGfx1150[nInitParametersI8GemmGfx1150]; // END_GEMM_Wmma_i8_gfx1150_DECS // BEGIN_CONV_Wmma_f16_gfx1150_DECS -static constexpr size_t nInitParametersF16ConvGfx1150 = 59; +static constexpr size_t nInitParametersF16ConvGfx1150 = 48; static const StringRef initParametersF16ConvGfx1150[nInitParametersF16ConvGfx1150]; // END_CONV_Wmma_f16_gfx1150_DECS // BEGIN_CONV_Wmma_i8_gfx1150_DECS -static constexpr size_t nInitParametersI8ConvGfx1150 = 21; +static constexpr size_t nInitParametersI8ConvGfx1150 = 16; static const StringRef initParametersI8ConvGfx1150[nInitParametersI8ConvGfx1150]; // END_CONV_Wmma_i8_gfx1150_DECS +// BEGIN_GEMM_Wmma_f16_gfx1101_DECS +static constexpr size_t nInitParametersF16GemmGfx1101 = 35; +static const StringRef initParametersF16GemmGfx1101[nInitParametersF16GemmGfx1101]; +// END_GEMM_Wmma_f16_gfx1101_DECS + +// BEGIN_GEMM_Wmma_i8_gfx1101_DECS +static constexpr size_t nInitParametersI8GemmGfx1101 = 18; +static const StringRef initParametersI8GemmGfx1101[nInitParametersI8GemmGfx1101]; +// END_GEMM_Wmma_i8_gfx1101_DECS + +// BEGIN_CONV_Wmma_f16_gfx1101_DECS +static constexpr size_t nInitParametersF16ConvGfx1101 = 128; +static const StringRef initParametersF16ConvGfx1101[nInitParametersF16ConvGfx1101]; +// END_CONV_Wmma_f16_gfx1101_DECS + +// BEGIN_CONV_Wmma_i8_gfx1101_DECS +static constexpr size_t nInitParametersI8ConvGfx1101 = 21; +static const StringRef initParametersI8ConvGfx1101[nInitParametersI8ConvGfx1101]; +// END_CONV_Wmma_i8_gfx1101_DECS + // BEGIN_GEMM_Wmma_f16_gfx1152_DECS static constexpr size_t nInitParametersF16GemmGfx1152 = 42; static const StringRef initParametersF16GemmGfx1152[nInitParametersF16GemmGfx1152]; @@ -3025,12 +3948,8 @@ static const StringRef initParametersI8AttentionGfx1152[nInitParametersI8Attenti {"gfx1000_gemm_f32", {PopulateParams::initParametersF32GemmGfx1000, PopulateParams::nInitParametersF32GemmGfx1000}}, -{"gfx1100_gemm_f32", {PopulateParams::initParametersF32GemmGfx1100, PopulateParams::nInitParametersF32GemmGfx1100}}, - {"gfx1000_conv_f32", {PopulateParams::initParametersF32ConvGfx1000, PopulateParams::nInitParametersF32ConvGfx1000}}, -{"gfx1100_conv_f32", {PopulateParams::initParametersF32ConvGfx1100, PopulateParams::nInitParametersF32ConvGfx1100}}, - {"gfx1201_gemm_f32", {PopulateParams::initParametersF32GemmGfx1201, PopulateParams::nInitParametersF32GemmGfx1201}}, {"gfx1201_conv_f32", {PopulateParams::initParametersF32ConvGfx1201, PopulateParams::nInitParametersF32ConvGfx1201}}, @@ -3043,6 +3962,10 @@ static const StringRef initParametersI8AttentionGfx1152[nInitParametersI8Attenti {"gfx1150_conv_f32", {PopulateParams::initParametersF32ConvGfx1150, PopulateParams::nInitParametersF32ConvGfx1150}}, +{"gfx1101_gemm_f32", {PopulateParams::initParametersF32GemmGfx1101, PopulateParams::nInitParametersF32GemmGfx1101}}, + +{"gfx1101_conv_f32", {PopulateParams::initParametersF32ConvGfx1101, PopulateParams::nInitParametersF32ConvGfx1101}}, + {"gfx1152_gemm_f32", {PopulateParams::initParametersF32GemmGfx1152, PopulateParams::nInitParametersF32GemmGfx1152}}, {"gfx1152_conv_f32", {PopulateParams::initParametersF32ConvGfx1152, PopulateParams::nInitParametersF32ConvGfx1152}}, @@ -3111,24 +4034,16 @@ static const StringRef initParametersI8AttentionGfx1152[nInitParametersI8Attenti {"gfx1000_gemm_f16", {PopulateParamsWmma::initParametersF16GemmGfx1000, PopulateParamsWmma::nInitParametersF16GemmGfx1000}}, -{"gfx1100_gemm_f16", {PopulateParamsWmma::initParametersF16GemmGfx1100, PopulateParamsWmma::nInitParametersF16GemmGfx1100}}, - {"gfx1000_conv_f16", {PopulateParamsWmma::initParametersF16ConvGfx1000, PopulateParamsWmma::nInitParametersF16ConvGfx1000}}, -{"gfx1100_conv_f16", {PopulateParamsWmma::initParametersF16ConvGfx1100, PopulateParamsWmma::nInitParametersF16ConvGfx1100}}, - {"gfx1000_gemm_fp8", {PopulateParamsWmma::initParametersFp8GemmGfx1000, PopulateParamsWmma::nInitParametersFp8GemmGfx1000}}, {"gfx1000_conv_fp8", {PopulateParamsWmma::initParametersFp8ConvGfx1000, PopulateParamsWmma::nInitParametersFp8ConvGfx1000}}, {"gfx1000_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1000, PopulateParamsWmma::nInitParametersI8GemmGfx1000}}, -{"gfx1100_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1100, PopulateParamsWmma::nInitParametersI8GemmGfx1100}}, - {"gfx1000_conv_i8", {PopulateParamsWmma::initParametersI8ConvGfx1000, PopulateParamsWmma::nInitParametersI8ConvGfx1000}}, -{"gfx1100_conv_i8", {PopulateParamsWmma::initParametersI8ConvGfx1100, PopulateParamsWmma::nInitParametersI8ConvGfx1100}}, - {"gfx1201_gemm_f16", {PopulateParamsWmma::initParametersF16GemmGfx1201, PopulateParamsWmma::nInitParametersF16GemmGfx1201}}, {"gfx1201_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1201, PopulateParamsWmma::nInitParametersI8GemmGfx1201}}, @@ -3171,12 +4086,8 @@ static const StringRef initParametersI8AttentionGfx1152[nInitParametersI8Attenti {"gfx1000_gemm_bf16", {PopulateParamsWmma::initParametersF16GemmGfx1000, PopulateParamsWmma::nInitParametersF16GemmGfx1000}}, // alias -> f16 -{"gfx1100_gemm_bf16", {PopulateParamsWmma::initParametersF16GemmGfx1100, PopulateParamsWmma::nInitParametersF16GemmGfx1100}}, // alias -> f16 - {"gfx1000_conv_bf16", {PopulateParamsWmma::initParametersF16ConvGfx1000, PopulateParamsWmma::nInitParametersF16ConvGfx1000}}, // alias -> f16 -{"gfx1100_conv_bf16", {PopulateParamsWmma::initParametersF16ConvGfx1100, PopulateParamsWmma::nInitParametersF16ConvGfx1100}}, // alias -> f16 - {"gfx1201_gemm_bf16", {PopulateParamsWmma::initParametersF16GemmGfx1201, PopulateParamsWmma::nInitParametersF16GemmGfx1201}}, // alias -> f16 {"gfx1201_conv_bf16", {PopulateParamsWmma::initParametersF16ConvGfx1201, PopulateParamsWmma::nInitParametersF16ConvGfx1201}}, // alias -> f16 @@ -3189,6 +4100,18 @@ static const StringRef initParametersI8AttentionGfx1152[nInitParametersI8Attenti {"gfx1150_conv_bf16", {PopulateParamsWmma::initParametersF16ConvGfx1150, PopulateParamsWmma::nInitParametersF16ConvGfx1150}}, // alias -> f16 +{"gfx1101_gemm_f16", {PopulateParamsWmma::initParametersF16GemmGfx1101, PopulateParamsWmma::nInitParametersF16GemmGfx1101}}, + +{"gfx1101_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1101, PopulateParamsWmma::nInitParametersI8GemmGfx1101}}, + +{"gfx1101_conv_f16", {PopulateParamsWmma::initParametersF16ConvGfx1101, PopulateParamsWmma::nInitParametersF16ConvGfx1101}}, + +{"gfx1101_conv_i8", {PopulateParamsWmma::initParametersI8ConvGfx1101, PopulateParamsWmma::nInitParametersI8ConvGfx1101}}, + +{"gfx1101_gemm_bf16", {PopulateParamsWmma::initParametersF16GemmGfx1101, PopulateParamsWmma::nInitParametersF16GemmGfx1101}}, // alias -> f16 + +{"gfx1101_conv_bf16", {PopulateParamsWmma::initParametersF16ConvGfx1101, PopulateParamsWmma::nInitParametersF16ConvGfx1101}}, // alias -> f16 + {"gfx1152_gemm_f16", {PopulateParamsWmma::initParametersF16GemmGfx1152, PopulateParamsWmma::nInitParametersF16GemmGfx1152}}, {"gfx1152_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1152, PopulateParamsWmma::nInitParametersI8GemmGfx1152}}, diff --git a/mlir/test/CAPI/mixr_full.c b/mlir/test/CAPI/mixr_full.c index 847a990026ee..30117edc27ac 100644 --- a/mlir/test/CAPI/mixr_full.c +++ b/mlir/test/CAPI/mixr_full.c @@ -194,7 +194,7 @@ static bool constructAndTraverseIr(MlirContext ctx) { mlirRockTuningSpaceCreate(module, RocmlirTuningParamSetKindFull); printf("Got tuning space,\n"); unsigned fNum = mlirRockTuningGetNumParams(tuningSpace); - // CHECK: full set = 932 + // CHECK: full set = 937 printf("full set = %u\n", fNum); MlirRockTuningParam tuningParam = mlirRockTuningParamCreate(); MlirRockTuningTable tuningTable = mlirRockTuningTableCreate(); diff --git a/mlir/test/Dialect/Rock/affix_tuning_params.mlir b/mlir/test/Dialect/Rock/affix_tuning_params.mlir index 7610a0d05710..eef5f7ad621d 100644 --- a/mlir/test/Dialect/Rock/affix_tuning_params.mlir +++ b/mlir/test/Dialect/Rock/affix_tuning_params.mlir @@ -10,7 +10,7 @@ // GRID-LABEL: rock_conv func.func @rock_conv(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 1800 rock.conv(%filter, %input, %output) features = none { @@ -28,7 +28,7 @@ func.func @rock_conv(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x // GRID-LABEL: rock_conv_schedulev2 func.func @rock_conv_schedulev2(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {schedule_version = #rock.schedule_version<2>, arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 1800 rock.conv(%filter, %input, %output) features = none { @@ -46,7 +46,7 @@ func.func @rock_conv_schedulev2(%filter : memref<1x128x8x3x3xf32>, %input : memr // GRID-LABEL: func.func @rock_conv_f16 func.func @rock_conv_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x1x8x32x32xf16>, %output : memref<128x1x128x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 1800 rock.conv(%filter, %input, %output) features = none { @@ -64,10 +64,10 @@ func.func @rock_conv_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x // GRID-LABEL: func.func @rock_conv_i8 func.func @rock_conv_i8(%filter : memref<1x128x8x3x3xi8>, %input : memref<128x1x8x32x32xi8>, %output : memref<128x1x128x30x30xi32>) attributes {arch = "amdgcn-amd-amdhsa:gfx908"} { // CHECK: rock.conv - // CHECK-SAME: derivedBlockSize = 64 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 256 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 3600 + // GRID-SAME: gridSize = 900 rock.conv(%filter, %input, %output) features = mfma|dot|atomic_add|atomic_add_f16 { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -84,9 +84,9 @@ func.func @rock_conv_i8(%filter : memref<1x128x8x3x3xi8>, %input : memref<128x1x func.func @rock_conv_bwd_data(%filter: memref<1x1024x1024x1x1xf32>, %input: memref<128x1x1024x14x14xf32>, %output: memref<128x1x1024x14x14xf32>) attributes {kernel = 0 : i32, arch = "amdgcn-amd-amdhsa:gfx908"} { // CHECK: rock.conv_bwd_data // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 25088 + // GRID-SAME: gridSize = 6272 rock.conv_bwd_data(%filter, %input, %output) features = mfma|dot|atomic_add|atomic_add_f16 { dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "c", "0", "1"], @@ -105,9 +105,9 @@ func.func @rock_conv_bwd_data(%filter: memref<1x1024x1024x1x1xf32>, %input: memr func.func @rock_conv_bwd_data_f16(%filter: memref<1x1024x1024x1x1xf16>, %input: memref<128x1x1024x14x14xf16>, %output: memref<128x1x1024x14x14xf16>) attributes {kernel = 0 : i32, arch = "amdgcn-amd-amdhsa:gfx908"} { // CHECK: rock.conv_bwd_data // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 25088 + // GRID-SAME: gridSize = 12544 rock.conv_bwd_data(%filter, %input, %output) features = mfma|dot|atomic_add|atomic_add_f16 { dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "c", "0", "1"], @@ -125,9 +125,9 @@ func.func @rock_conv_bwd_data_f16(%filter: memref<1x1024x1024x1x1xf16>, %input: // GRID-LABEL: func.func @rock_conv_bwd_data_padMN func.func @rock_conv_bwd_data_padMN(%filter : memref<1x64x3x1x1xf32>, %input : memref<11x1x3x15x15xf32>, %output : memref<11x1x64x15x15xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv_bwd_data - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 39 + // GRID-SAME: gridSize = 78 rock.conv_bwd_data(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -145,9 +145,9 @@ func.func @rock_conv_bwd_data_padMN(%filter : memref<1x64x3x1x1xf32>, %input : m // GRID-LABEL: @rock_conv_bwd_data_padMK func.func @rock_conv_bwd_data_padMK(%filter : memref<1x11x3x1x1xf32>, %input : memref<128x1x3x15x15xf32>, %output : memref<128x1x11x15x15xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv_bwd_data - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 225 + // GRID-SAME: gridSize = 450 rock.conv_bwd_data(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -165,9 +165,9 @@ func.func @rock_conv_bwd_data_padMK(%filter : memref<1x11x3x1x1xf32>, %input : m // GRID-LABEL: @rock_conv_bwd_weight func.func @rock_conv_bwd_weight(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 6 + // GRID-SAME: gridSize = 12 rock.conv_bwd_weight(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -183,9 +183,9 @@ func.func @rock_conv_bwd_weight(%filter : memref<1x128x8x3x3xf32>, %input : memr // GRID-LABEL: @rock_conv_bwd_weight_f16 func.func @rock_conv_bwd_weight_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x1x8x32x32xf16>, %output : memref<128x1x128x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 6 + // GRID-SAME: gridSize = 12 rock.conv_bwd_weight(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -201,7 +201,7 @@ func.func @rock_conv_bwd_weight_f16(%filter : memref<1x128x8x3x3xf16>, %input : // GRID-LABEL: func.func @rock_conv_bwd_weight_padALL func.func @rock_conv_bwd_weight_padALL(%filter : memref<1x20x8x3x3xf32>, %input : memref<7x1x8x32x32xf32>, %output : memref<7x1x20x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 3 rock.conv_bwd_weight(%filter, %input, %output) features = none { @@ -219,7 +219,7 @@ func.func @rock_conv_bwd_weight_padALL(%filter : memref<1x20x8x3x3xf32>, %input // GRID-LABEL: @rock_conv_bwd_weight_padALL_f16 func.func @rock_conv_bwd_weight_padALL_f16(%filter : memref<1x20x8x3x3xf16>, %input : memref<7x1x8x32x32xf16>, %output : memref<7x1x20x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 3 rock.conv_bwd_weight(%filter, %input, %output) features = none { @@ -259,10 +259,10 @@ func.func @rock_conv_7x7_tuning(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<256 // GRID-LABEL: @rock_conv_7x7 func.func @rock_conv_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<256x1x3x230x230xf32>, %arg2: memref<256x1x64x112x112xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx908"} { // CHECK: rock.conv - // CHECK-SAME: derivedBlockSize = 64 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 256 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 100352 + // GRID-SAME: gridSize = 12544 rock.conv(%arg0, %arg1, %arg2) features = mfma|dot|atomic_add|atomic_add_f16 { dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "c", "0", "1"], @@ -279,7 +279,7 @@ func.func @rock_conv_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<256x1x3x23 func.func @rock_conv_bwd_weight_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<256x1x3x230x230xf32>, %arg2: memref<256x1x64x112x112xf32>) attributes {kernel = 0 : i32, arch = "amdgcn-amd-amdhsa:gfx908", numCU = 120 : i32} { // CHECK: rock.conv_bwd_weight // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 10 rock.conv_bwd_weight(%arg0, %arg1, %arg2) features = mfma|dot|atomic_add|atomic_add_f16 { @@ -319,10 +319,10 @@ func.func @rock_conv_bwd_data_7x7_tuning(%arg0: memref<1x64x3x7x7xf32>, %arg1: m // GRID-LABEL: @rock_conv_bwd_data_7x7 func.func @rock_conv_bwd_data_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<256x1x3x230x230xf32>, %arg2: memref<256x1x64x112x112xf32>) attributes {kernel = 1 : i32, arch = "amdgcn-amd-amdhsa:gfx908"} { // CHECK: rock.conv_bwd_data - // CHECK-SAME: derivedBlockSize = 64 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 256 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 211600 + // GRID-SAME: gridSize = 52900 rock.conv_bwd_data(%arg0, %arg1, %arg2) features = mfma|dot|atomic_add|atomic_add_f16 { dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "c", "0", "1"], @@ -340,9 +340,9 @@ func.func @rock_conv_bwd_data_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<2 // GRID-LABEL: @rock_gemm_from_conv func.func @rock_gemm_from_conv(%a : memref<1x72x128xf32>, %b : memref<1x72x115200xf32>, %c : memref<1x128x115200xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.gemm - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 900 + // GRID-SAME: gridSize = 1800 rock.gemm %c = tr %a * %b features = none storeMethod = set : memref<1x128x115200xf32> = memref<1x72x128xf32> * memref<1x72x115200xf32> return @@ -352,10 +352,10 @@ func.func @rock_gemm_from_conv(%a : memref<1x72x128xf32>, %b : memref<1x72x11520 // GRID-LABEL: func.func @rock_gemm_from_i8_conv func.func @rock_gemm_from_i8_conv(%a : memref<1x72x128xi8>, %b : memref<1x72x115200xi8>, %c : memref<1x128x115200xi32>) attributes {arch = "amdgcn-amd-amdhsa:gfx908", numCU = 120 : i32} { // CHECK: rock.gemm - // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 512 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 7200 + // GRID-SAME: gridSize = 1800 rock.gemm %c = tr %a * %b features = mfma|dot|atomic_add|atomic_add_f16 storeMethod = set : memref<1x128x115200xi32> = memref<1x72x128xi8> * memref<1x72x115200xi8> return @@ -365,10 +365,10 @@ func.func @rock_gemm_from_i8_conv(%a : memref<1x72x128xi8>, %b : memref<1x72x115 // GRID-LABEL: func.func @rock_gemm_from_i8_conv_schedule_v2 func.func @rock_gemm_from_i8_conv_schedule_v2(%a : memref<1x72x128xi8>, %b : memref<1x72x115200xi8>, %c : memref<1x128x115200xi32>) attributes {schedule_version = #rock.schedule_version<2>, arch = "amdgcn-amd-amdhsa:gfx908", numCU = 120 : i32} { // CHECK: rock.gemm - // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 512 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 7200 + // GRID-SAME: gridSize = 1800 rock.gemm %c = tr %a * %b features = mfma|dot|atomic_add|atomic_add_f16 storeMethod = set : memref<1x128x115200xi32> = memref<1x72x128xi8> * memref<1x72x115200xi8> return @@ -381,10 +381,10 @@ func.func @rock_gemm_from_i8_conv_schedule_v2(%a : memref<1x72x128xi8>, %b : mem // GRID-LABEL: func.func @rock_gemm_from_i8_conv_gfx942 func.func @rock_gemm_from_i8_conv_gfx942(%a : memref<1x72x128xi8>, %b : memref<1x72x115200xi8>, %c : memref<1x128x115200xi32>) attributes {arch = "amdgcn-amd-amdhsa:gfx942", numCU = 120 : i32} { // CHECK: rock.gemm - // CHECK-SAME: derivedBlockSize = 256 - // CHECK-SAME: params = #rock.accel_gemm_params + // CHECK-SAME: derivedBlockSize = 512 + // CHECK-SAME: params = #rock.accel_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 14400 + // GRID-SAME: gridSize = 1800 rock.gemm %c = tr %a * %b features = mfma|dot|atomic_add|atomic_add_f16 storeMethod = set : memref<1x128x115200xi32> = memref<1x72x128xi8> * memref<1x72x115200xi8> return diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir index 92be2f1994b5..8ba76962c546 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA false -transB false -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::MorN // VECTORIZATION-NEXT: bVectorLen: 2 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir index 853c3779e9af..778dbac1e10a 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA false -transB true -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::K // VECTORIZATION-NEXT: bVectorLen: 8 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir index 1c1f9e26dc68..6164549e560a 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir @@ -6,7 +6,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB false -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::MorN // VECTORIZATION-NEXT: bVectorLen: 2 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir index 9a89a23dfe17..b35867a25f25 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB false -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::MorN // VECTORIZATION-NEXT: bVectorLen: 2 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/unitdim-m-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/unitdim-m-e2e.mlir index 2eb79a3eab78..a22cf6f17803 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/unitdim-m-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/unitdim-m-e2e.mlir @@ -9,7 +9,7 @@ // VECTORIZATION: aVectorDim: GemmDimension::K // VECTORIZATION-NEXT: aVectorLen: 8 // VECTORIZATION: bVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: bVectorLen: 4 +// VECTORIZATION-NEXT: bVectorLen: 8 module { func.func @test(%arg0: !migraphx.shaped<2x1x320xf16, 320x1x1>, %arg1: !migraphx.shaped<2x640x320xf16, 204800x1x640>, %arg2: !migraphx.shaped<2x64x10xf16, 0x10x1>) -> !migraphx.shaped<2x64x10xf16, 640x10x1> { diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir index 5753af9c6330..62dcc2df3117 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB true -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::K // VECTORIZATION-NEXT: bVectorLen: 8 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir index b19e6a1ec7df..3f95dfc8caae 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB true -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 4 +// VECTORIZATION-NEXT: aVectorLen: 2 // VECTORIZATION: bVectorDim: GemmDimension::K // VECTORIZATION-NEXT: bVectorLen: 8