From 828ebb0854370189c4474f9ca4e01b705b6e005b Mon Sep 17 00:00:00 2001 From: Ashay Rane <253344819+raneashay@users.noreply.github.com> Date: Fri, 27 Feb 2026 08:51:11 -0600 Subject: [PATCH] Add SVE, SVE2, SVEBITPERM, and FP16 feature detection on Windows/ARM64 This patch adds support for SVE, SVE2, SVEBITPERM, FPHP, and ASIMDHP on supported ARM64 processors when running Windows. Since the MSVC compiler does not support inline assembly or ARM64 processors, this patch introduces a separate file to be able to read VL using the rdvl assembly instruction. Windows does not expose a mechanism for writing VL so this patch makes `set_and_get_current_sve_vector_length()` simply return the existing VL. This patch introduces a new test (TestSVEFeatureDetection.java) that validates the SVE level and VL determined by the CPU feature detection code, and this patch modifies two existing tests to disassociate support for SVE/SVE2 from support for FPHP and ASIMDHP. Specifically, in TestFloat16VectorOperations.java, SVE alone is insufficient to expect half-precision vector operations; instead FPHP and ASIMDHP support (which is already exercised by the test case) suffices. Along similar lines, in TestReductions.java, we should expect to see non-zero vector operations when SVE is available and we should fail on vector operations when SVE is unavailable. Finally, this patch updates TestFloat16ScalarOperations.java to check for constant-folding of FMA operations only on non-Windows platforms. We do this because `FmaDNode::Value()`, `FmaFNode::Value()`, as well as `FMAHFNode::Value()` fold FMA nodes only when `__STDC_IEC_559__` is defined, which is not the case on Windows for both GCC as well as MSVC. Perhaps the reason we hadn't discovered this discrepancy until now could be because FMA support for Windows (on ARM64) was disabled until this patch so the tests that were predicated on FPHP/ASIMDHP support never ran on Windows. Of course, this doesn't explain why we never caught this problem on Windows/x86 machines that support FMAs, but that could be because processors that support `avx512_fp16` are new and we haven't run CI on the machines. --- .../sve_helper_windows_aarch64.S | 37 ++++ .../vm_version_windows_aarch64.cpp | 54 ++++- .../c2/aarch64/TestSVEFeatureDetection.java | 189 ++++++++++++++++++ .../irTests/TestFloat16ScalarOperations.java | 16 +- .../loopopts/superword/TestReductions.java | 108 +++++----- .../TestFloat16VectorOperations.java | 24 +-- 6 files changed, 353 insertions(+), 75 deletions(-) create mode 100644 src/hotspot/os_cpu/windows_aarch64/sve_helper_windows_aarch64.S create mode 100644 test/hotspot/jtreg/compiler/c2/aarch64/TestSVEFeatureDetection.java diff --git a/src/hotspot/os_cpu/windows_aarch64/sve_helper_windows_aarch64.S b/src/hotspot/os_cpu/windows_aarch64/sve_helper_windows_aarch64.S new file mode 100644 index 0000000000000..7aa48662dc84d --- /dev/null +++ b/src/hotspot/os_cpu/windows_aarch64/sve_helper_windows_aarch64.S @@ -0,0 +1,37 @@ +; Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. +; DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +; +; This code is free software; you can redistribute it and/or modify it +; under the terms of the GNU General Public License version 2 only, as +; published by the Free Software Foundation. +; +; This code is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; version 2 for more details (a copy is included in the LICENSE file that +; accompanied this code). +; +; You should have received a copy of the GNU General Public License version +; 2 along with this work; if not, write to the Free Software Foundation, +; Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +; +; Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +; or visit www.oracle.com if you need additional information or have any +; questions. +; + + ; int get_sve_vector_length_impl(void); + ; + ; Returns the current SVE vector length in bytes. + ; Uses the RDVL instruction which returns imm * VL in bytes. + ; With imm=1 this gives the SVE vector length directly. + + ALIGN 4 + EXPORT get_sve_vector_length_impl + AREA sve_helper_text, CODE, READONLY + +get_sve_vector_length_impl + rdvl x0, 1 + ret + + END diff --git a/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp b/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp index 93beb549366be..dad262a9321da 100644 --- a/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp +++ b/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp @@ -26,16 +26,42 @@ #include "runtime/os.hpp" #include "runtime/vm_version.hpp" +// Since PF_ARM_SVE_INSTRUCTIONS_AVAILABLE and related constants were added in +// Windows 11 (version 24H2) and in Windows Server 2025, we define them here for +// compatibility with older SDK versions. +#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46 +#endif + +#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47 +#endif + +#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51 +#endif + +#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67 +#endif + +// Assembly helper implemented in sve_helper_windows_aarch64.S. Executes the +// RDVL instruction to return the vector length in bytes. +extern "C" int get_sve_vector_length_impl(void); + int VM_Version::get_current_sve_vector_length() { assert(VM_Version::supports_sve(), "should not call this"); - ShouldNotReachHere(); - return 0; + return get_sve_vector_length_impl(); } int VM_Version::set_and_get_current_sve_vector_length(int length) { assert(VM_Version::supports_sve(), "should not call this"); - ShouldNotReachHere(); - return 0; + + // Unlike Linux, Windows does not present a way to modify the VL (the + // rationale is that the OS expects the application to use the maximum vector + // length supported by the hardware), so we simply return the current VL. The + // caller (`VM_Version::initialize()`) will print a warning and move on. + return get_sve_vector_length_impl(); } void VM_Version::get_os_cpu_info() { @@ -43,15 +69,33 @@ void VM_Version::get_os_cpu_info() { if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { set_feature(CPU_CRC32); } + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { set_feature(CPU_AES); set_feature(CPU_SHA1); set_feature(CPU_SHA2); } + if (IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE)) { set_feature(CPU_ASIMD); } - // No check for CPU_PMULL, CPU_SVE, CPU_SVE2 + + if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) { + set_feature(CPU_SVE); + } + + if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) { + set_feature(CPU_SVE2); + } + + if (IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE)) { + set_feature(CPU_SVEBITPERM); + } + + if (IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE)) { + set_feature(CPU_FPHP); + set_feature(CPU_ASIMDHP); + } __int64 dczid_el0 = _ReadStatusReg(0x5807 /* ARM64_DCZID_EL0 */); diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEFeatureDetection.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEFeatureDetection.java new file mode 100644 index 0000000000000..a9512f78c1845 --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEFeatureDetection.java @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @summary Verify SVE/SVE2 feature detection for both Windows and Linux. + * + * @requires os.arch == "aarch64" & vm.compiler2.enabled + * @library /test/lib / + * @build jdk.test.whitebox.WhiteBox + * @run driver jdk.test.lib.helpers.ClassFileInstaller + * jdk.test.whitebox.WhiteBox + * + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI compiler.c2.aarch64.TestSVEFeatureDetection + */ + +package compiler.c2.aarch64; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import jdk.test.lib.Asserts; +import jdk.test.lib.process.ProcessTools; +import jdk.test.lib.process.OutputAnalyzer; +import jdk.test.whitebox.WhiteBox; + +public class TestSVEFeatureDetection { + private static final WhiteBox WB = WhiteBox.getWhiteBox(); + private static final String KEY_USE_SVE = "UseSVE="; + private static final String KEY_MAX_VECTOR = "MaxVectorSize="; + private static final String KEY_HAS_SVE = "has_sve="; + private static final String KEY_HAS_SVE2 = "has_sve2="; + + public static void main(String[] args) throws Exception { + if (args.length > 0 && args[0].equals("flagCheck")) { + printFlags(); + } else { + runDriver(); + } + } + + private static void printFlags() { + int sveLevel = WB.getUintVMFlag("UseSVE").intValue(); + long maxVectorSize = WB.getIntxVMFlag("MaxVectorSize"); + List features = Arrays.asList(WB.getCPUFeatures().split(", ")); + boolean hasSve = features.contains("sve"); + boolean hasSve2 = features.contains("sve2"); + + System.out.println(KEY_USE_SVE + sveLevel); + System.out.println(KEY_MAX_VECTOR + maxVectorSize); + System.out.println(KEY_HAS_SVE + hasSve); + System.out.println(KEY_HAS_SVE2 + hasSve2); + } + + private static void runDriver() throws Exception { + int sveLevel = WB.getUintVMFlag("UseSVE").intValue(); + long maxVectorSize = WB.getIntxVMFlag("MaxVectorSize"); + List features = Arrays.asList(WB.getCPUFeatures().split(", ")); + boolean hasSve = features.contains("sve"); + boolean hasSve2 = features.contains("sve2"); + + // If SVE is not present, just verify a consistent disabled state. + if (!hasSve) { + Asserts.assertEquals(sveLevel, 0, + "UseSVE must be 0 when hardware lacks SVE"); + Asserts.assertFalse(hasSve2, + "sve2 must be absent when sve is absent"); + return; + } + + Asserts.assertTrue(sveLevel > 0, + "UseSVE should be auto-set to > 0 when SVE hardware is present"); + Asserts.assertTrue(maxVectorSize >= 16, + "MaxVectorSize must be >= 16 for SVE, got " + maxVectorSize); + Asserts.assertTrue(Long.bitCount(maxVectorSize) == 1, + "MaxVectorSize must be a power of two, got " + maxVectorSize); + Asserts.assertTrue(maxVectorSize % 16 == 0, + "MaxVectorSize must be a multiple of 16, got " + maxVectorSize); + Asserts.assertTrue(maxVectorSize <= 256, + "MaxVectorSize must be <= 256 (2048 bits), got " + maxVectorSize); + + if (hasSve2) { + Asserts.assertEquals(sveLevel, 2, + "UseSVE should be 2 when hardware supports SVE2"); + } else { + Asserts.assertEquals(sveLevel, 1, + "UseSVE should be 1 when hardware supports SVE but not SVE2"); + } + + OutputAnalyzer out = spawnFlagCheck("-XX:UseSVE=0"); + out.shouldHaveExitValue(0); + out.shouldContain(KEY_USE_SVE + "0"); + out.shouldContain(KEY_HAS_SVE + "false"); + out.shouldContain(KEY_HAS_SVE2 + "false"); + + out = spawnFlagCheck("-XX:UseSVE=1", "-XX:MaxVectorSize=512"); + out.shouldHaveExitValue(0); + out.shouldContain("warning"); + + boolean isWindows = System.getProperty("os.name").toLowerCase().contains("windows"); + out = spawnFlagCheck("-XX:UseSVE=1", "-XX:MaxVectorSize=16"); + out.shouldHaveExitValue(0); + if (isWindows && maxVectorSize > 16) { + out.shouldContain("warning"); + out.shouldContain(KEY_MAX_VECTOR + maxVectorSize); + } else { + out.shouldContain(KEY_MAX_VECTOR + "16"); + } + + if (hasSve2) { + out = spawnFlagCheck("-XX:UseSVE=2"); + out.shouldHaveExitValue(0); + out.shouldContain(KEY_USE_SVE + "2"); + out.shouldContain(KEY_HAS_SVE + "true"); + out.shouldContain(KEY_HAS_SVE2 + "true"); + + out = spawnFlagCheck("-XX:UseSVE=1"); + out.shouldHaveExitValue(0); + out.shouldContain(KEY_USE_SVE + "1"); + out.shouldContain(KEY_HAS_SVE + "true"); + out.shouldContain(KEY_HAS_SVE2 + "false"); + } else { + out = spawnFlagCheck("-XX:UseSVE=2"); + out.shouldHaveExitValue(0); + out.shouldContain("SVE2 specified, but not supported on current CPU"); + out.shouldContain(KEY_USE_SVE + "1"); + out.shouldContain(KEY_HAS_SVE + "true"); + out.shouldContain(KEY_HAS_SVE2 + "false"); + } + + out = spawnFlagCheck("-XX:UseSVE=1"); + out.shouldHaveExitValue(0); + out.shouldContain(KEY_USE_SVE + "1"); + out.shouldContain(KEY_HAS_SVE + "true"); + out.shouldMatch("MaxVectorSize=\\d+"); + + if (maxVectorSize >= 32) { + out = spawnFlagCheck("-XX:UseSVE=1", "-XX:MaxVectorSize=32"); + out.shouldHaveExitValue(0); + if (isWindows && maxVectorSize > 32) { + out.shouldContain("warning"); + out.shouldContain(KEY_MAX_VECTOR + maxVectorSize); + } else { + out.shouldContain(KEY_MAX_VECTOR + "32"); + } + } + } + + private static OutputAnalyzer spawnFlagCheck(String... extraFlags) + throws Exception { + List args = new ArrayList<>(); + args.add("-Xbootclasspath/a:."); + args.add("-XX:+UnlockDiagnosticVMOptions"); + args.add("-XX:+WhiteBoxAPI"); + for (String f : extraFlags) { + args.add(f); + } + args.add(TestSVEFeatureDetection.class.getName()); + args.add("flagCheck"); + + ProcessBuilder pb = ProcessTools.createLimitedTestJavaProcessBuilder( + args.toArray(new String[0])); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.reportDiagnosticSummary(); + return output; + } +} diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java b/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java index 445fef5e55a58..0b9c4fb02c903 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java @@ -714,9 +714,13 @@ public void testSqrtConstantFolding() { @Test @IR(counts = {IRNode.FMA_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}, + // On Windows, both GCC and MSVC don't set __STDC_IEC_559__, so FMAs on constants are not folded. + applyIfPlatform = {"windows", "false"}) @IR(counts = {IRNode.FMA_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) + applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}, + // On Windows, both GCC and MSVC don't set __STDC_IEC_559__, so FMAs on constants are not folded. + applyIfPlatform = {"windows", "false"}) @Warmup(10000) public void testFMAConstantFolding() { // If any argument is NaN, the result is NaN. @@ -752,9 +756,13 @@ public void testFMAConstantFolding() { @Test @IR(failOn = {IRNode.ADD_HF, IRNode.SUB_HF, IRNode.MUL_HF, IRNode.DIV_HF, IRNode.SQRT_HF, IRNode.FMA_HF}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}, + // On Windows, both GCC and MSVC don't set __STDC_IEC_559__, so FMAs on constants are not folded. + applyIfPlatform = {"windows", "false"}) @IR(failOn = {IRNode.ADD_HF, IRNode.SUB_HF, IRNode.MUL_HF, IRNode.DIV_HF, IRNode.SQRT_HF, IRNode.FMA_HF}, - applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) + applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}, + // On Windows, both GCC and MSVC don't set __STDC_IEC_559__, so FMAs on constants are not folded. + applyIfPlatform = {"windows", "false"}) @Warmup(10000) public void testRounding1() { dst[0] = float16ToRawShortBits(add(RANDOM1, RANDOM2)); diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestReductions.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestReductions.java index 5c085e6a3a343..fa4d2a1beec01 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestReductions.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestReductions.java @@ -1768,7 +1768,7 @@ private static long longAddSimple() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MUL_REDUCTION_VL, "> 0", IRNode.MUL_VL, "> 0"}, // vector accumulator - applyIfCPUFeature = {"avx512dq", "true"}, + applyIfCPUFeatureOr = {"avx512dq", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512dq", "false", "sse4.1", "true"}) @@ -1776,7 +1776,7 @@ private static long longAddSimple() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MUL_REDUCTION_VL, "> 0", IRNode.MUL_VL, "= 0"}, // Reduction NOT moved out of loop - applyIfCPUFeatureOr = {"asimd", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) // Note: NEON does not support MulVL for auto vectorization. There is // a scalarized implementation, but that is not profitable for @@ -1840,10 +1840,10 @@ private static long longMaxSimple() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.AND_REDUCTION_V, "> 0", IRNode.AND_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While AndReductionV is implemented in NEON (see longAndSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1861,10 +1861,10 @@ private static long longAndDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.OR_REDUCTION_V, "> 0", IRNode.OR_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While OrReductionV is implemented in NEON (see longOrSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1882,10 +1882,10 @@ private static long longOrDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.XOR_REDUCTION_V, "> 0", IRNode.XOR_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longXorSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1903,10 +1903,10 @@ private static long longXorDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_REDUCTION_VL, "> 0", IRNode.ADD_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longAddSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1924,13 +1924,13 @@ private static long longAddDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MUL_REDUCTION_VL, "> 0", IRNode.MUL_VL, "> 0"}, - applyIfCPUFeature = {"avx512dq", "true"}, + applyIfCPUFeatureOr = {"avx512dq", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512dq", "false", "sse4.1", "true"}) // I think this could vectorize, but currently does not. Filed: JDK-8370673 @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // MulVL is not implemented on NEON, so we also not have the reduction. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1948,13 +1948,13 @@ private static long longMulDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MIN_REDUCTION_V, "> 0", IRNode.MIN_VL, "> 0"}, - applyIfCPUFeature = {"avx512", "true"}, + applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"}) // I think this could vectorize, but currently does not. Filed: JDK-8370671 @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longMinSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1972,13 +1972,13 @@ private static long longMinDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MAX_REDUCTION_V, "> 0", IRNode.MAX_VL, "> 0"}, - applyIfCPUFeature = {"avx512", "true"}, + applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"}) // I think this could vectorize, but currently does not. Filed: JDK-8370671 @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longMaxSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1997,10 +1997,10 @@ private static long longMaxDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.AND_REDUCTION_V, "> 0", IRNode.AND_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While AndReductionV is implemented in NEON (see longAndSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2018,10 +2018,10 @@ private static long longAndBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.OR_REDUCTION_V, "> 0", IRNode.OR_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While OrReductionV is implemented in NEON (see longOrSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2039,10 +2039,10 @@ private static long longOrBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.XOR_REDUCTION_V, "> 0", IRNode.XOR_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longXorSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2060,10 +2060,10 @@ private static long longXorBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_REDUCTION_VL, "> 0", IRNode.ADD_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longAddSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2081,7 +2081,7 @@ private static long longAddBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MUL_REDUCTION_VL, "> 0", IRNode.MUL_VL, "> 0"}, - applyIfCPUFeature = {"avx512dq", "true"}, + applyIfCPUFeatureOr = {"avx512dq", "true", "sve", "true"}, applyIfAnd = {"AutoVectorizationOverrideProfitability", "> 0", "LoopUnrollLimit", ">= 1000"}) @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2092,7 +2092,7 @@ private static long longAddBig() { // If you can eliminate this exception for LoopUnrollLimit, please remove // the flag completely from the test, also the "addFlags" at the top. @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // MulVL is not implemented on NEON, so we also not have the reduction. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2110,13 +2110,13 @@ private static long longMulBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MIN_REDUCTION_V, "> 0", IRNode.MIN_VL, "> 0"}, - applyIfCPUFeature = {"avx512", "true"}, + applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"}) // I think this could vectorize, but currently does not. Filed: JDK-8370671 @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longMinSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2134,13 +2134,13 @@ private static long longMinBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MAX_REDUCTION_V, "> 0", IRNode.MAX_VL, "> 0"}, - applyIfCPUFeature = {"avx512", "true"}, + applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"}) // I think this could vectorize, but currently does not. Filed: JDK-8370671 @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longMaxSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2159,10 +2159,10 @@ private static long longMaxBig() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.ADD_REDUCTION_V, "> 0", IRNode.ADD_VF, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "= 2"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2183,10 +2183,10 @@ private static float floatAddSimple() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_REDUCTION_VF, "> 0", IRNode.MUL_VF, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "= 2"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2242,10 +2242,10 @@ private static float floatMaxSimple() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.ADD_REDUCTION_V, "> 0", IRNode.ADD_VF, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2263,10 +2263,10 @@ private static float floatAddDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_REDUCTION_VF, "> 0", IRNode.MUL_VF, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2319,10 +2319,10 @@ private static float floatMaxDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.ADD_REDUCTION_V, "> 0", IRNode.ADD_VF, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2340,10 +2340,10 @@ private static float floatAddBig() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_REDUCTION_VF, "> 0", IRNode.MUL_VF, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2396,10 +2396,10 @@ private static float floatMaxBig() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.ADD_REDUCTION_VD, "> 0", IRNode.ADD_VD, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "= 2"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, @@ -2420,10 +2420,10 @@ private static double doubleAddSimple() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_REDUCTION_VD, "> 0", IRNode.MUL_VD, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "= 2"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, @@ -2479,10 +2479,10 @@ private static double doubleMaxSimple() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.ADD_REDUCTION_V, "> 0", IRNode.ADD_VD, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, @@ -2500,10 +2500,10 @@ private static double doubleAddDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_REDUCTION_VD, "> 0", IRNode.MUL_VD, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, @@ -2556,10 +2556,10 @@ private static double doubleMaxDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.ADD_REDUCTION_V, "> 0", IRNode.ADD_VD, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, @@ -2577,10 +2577,10 @@ private static double doubleAddBig() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_REDUCTION_VD, "> 0", IRNode.MUL_VD, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, diff --git a/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorOperations.java b/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorOperations.java index f3c27c4d278a5..076c496d581f2 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorOperations.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorOperations.java @@ -92,7 +92,7 @@ public TestFloat16VectorOperations() { @Test @Warmup(50) @IR(counts = {IRNode.ADD_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.ADD_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorAddFloat16() { @@ -113,7 +113,7 @@ public void checkResultAdd() { @Test @Warmup(50) @IR(counts = {IRNode.SUB_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.SUB_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorSubFloat16() { @@ -134,7 +134,7 @@ public void checkResultSub() { @Test @Warmup(50) @IR(counts = {IRNode.MUL_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.MUL_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorMulFloat16() { @@ -154,7 +154,7 @@ public void checkResultMul() { @Test @Warmup(50) @IR(counts = {IRNode.DIV_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.DIV_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorDivFloat16() { @@ -174,7 +174,7 @@ public void checkResultDiv() { @Test @Warmup(50) @IR(counts = {IRNode.MIN_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.MIN_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorMinFloat16() { @@ -194,7 +194,7 @@ public void checkResultMin() { @Test @Warmup(50) @IR(counts = {IRNode.MAX_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.MAX_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorMaxFloat16() { @@ -214,7 +214,7 @@ public void checkResultMax() { @Test @Warmup(50) @IR(counts = {IRNode.SQRT_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.SQRT_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorSqrtFloat16() { @@ -234,7 +234,7 @@ public void checkResultSqrt() { @Test @Warmup(50) @IR(counts = {IRNode.FMA_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.FMA_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorFmaFloat16() { @@ -256,7 +256,7 @@ public void checkResultFma() { @Test @Warmup(50) @IR(counts = {IRNode.FMA_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.FMA_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorFmaFloat16ScalarMixedConstants() { @@ -279,7 +279,7 @@ public void checkResultFmaScalarMixedConstants() { @Test @Warmup(50) @IR(counts = {IRNode.FMA_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.FMA_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorFmaFloat16MixedConstants() { @@ -302,7 +302,7 @@ public void checkResultFmaMixedConstants() { @Test @Warmup(50) @IR(counts = {IRNode.FMA_VHF, " 0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.FMA_VHF, " 0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorFmaFloat16AllConstants() { @@ -329,7 +329,7 @@ public void checkResultFmaAllConstants() { @Test @Warmup(50) @IR(counts = {IRNode.ADD_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.ADD_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorAddConstInputFloat16() {