[clang][AArch64][SVE2p3][SME2p3] Add intrinsics for v9.7a shift operations by amilendra · Pull Request #186087 · llvm/llvm-project

amilendra · 2026-03-12T10:54:37Z

Add the following new clang intrinsics based on the ACLE specification ARM-software/acle#428 (Add alpha support for 9.7 data processing intrinsics)

Multi-vector saturating rounding shift right narrow and interleave instructions

SQRSHRN
- svint8_t svqrshrn_s8(svint16x2_t, uint64_t) / svint8_t svqrshrn_n_s8_s16_x2(svint16x2_t, uint64_t)
UQRSHRN
- svuint8_t svqrshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqrshrn_n_u8_u16_x2(svuint16x2_t, uint64_t)
SQRSHRUN
- svuint8_t svqrshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqrshrun_n_u8_s16_x2(svint16x2_t, uint64_t)

Multi-vector saturating shift right narrow and interleave

SQSHRN
- svint8_t svqshrn_s8(svint16x2_t, uint64_t) / svint8_t svqshrn_n_s8_s16_x2(svint16x2_t, uint64_t)
- svint16_t svqshrn_s16(svint32x2_t, uint64_t) / svint16_t svqshrn_n_s16_s32_x2(svint32x2_t, uint64_t)
UQSHRN
- svuint8_t svqshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqshrn_n_u8_u16_x2(svuint16x2_t, uint64_t)
- svuint16_t svqshrn_u16(svuint32x2_t, uint64_t) / svuint16_t svqshrn_n_u16_u32_x2(svuint32x2_t, uint64_t)
SQSHRUN
- svuint8_t svqshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqshrun_n_u8_s16_x2(svint16x2_t, uint64_t)
- svuint16_t svqshrun_u16(svint32x2_t, uint64_t) / svuint16_t svqshrun_n_u16_s32_x2(svint32x2_t, uint64_t)

llvmbot · 2026-03-12T10:55:09Z

@llvm/pr-subscribers-backend-aarch64

Author: None (amilendra)

Changes

Add the following new clang intrinsics based on the ACLE specification ARM-software/acle#428 (Add alpha support for 9.7 data processing intrinsics)

Multi-vector saturating rounding shift right narrow and interleave instructions

SQRSHRN
- svint8_t svqrshrn_s8(svint16x2_t, uint64_t) / svint8_t svqrshrn_n_s8_s16_x2(svint16x2_t, uint64_t)
UQRSHRN
- svuint8_t svqrshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqrshrn_n_u8_u16_x2(svuint16x2_t, uint64_t)
SQRSHRUN
- svuint8_t svqrshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqrshrun_n_u8_s16_x2(svint16x2_t, uint64_t)

Multi-vector saturating shift right narrow and interleave

SQSHRN
- svint8_t svqshrn_s8(svint16x2_t, uint64_t) / svint8_t svqshrn_n_s8_s16_x2(svint16x2_t, uint64_t)
- svint16_t svqshrn_s16(svint32x2_t, uint64_t) / svint16_t svqshrn_n_s16_s32_x2(svint32x2_t, uint64_t)
UQSHRN
- svuint8_t svqshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqshrn_n_u8_u16_x2(svuint16x2_t, uint64_t)
- svuint16_t svqshrn_u16(svuint32x2_t, uint64_t) / svuint16_t svqshrn_n_u16_u32_x2(svuint32x2_t, uint64_t)
SQSHRUN
- svuint8_t svqshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqshrun_n_u8_s16_x2(svint16x2_t, uint64_t)
- svuint16_t svqshrun_u16(svint32x2_t, uint64_t) / svuint16_t svqshrun_n_u16_s32_x2(svint32x2_t, uint64_t)

Patch is 70.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/186087.diff

10 Files Affected:

(modified) clang/include/clang/Basic/arm_sve.td (+18)
(added) clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c (+148)
(added) clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c (+271)
(added) clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c (+160)
(added) clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_sme2p3.c (+161)
(added) clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3.cpp (+51)
(added) clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp (+87)
(modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+7-1)
(modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+9-9)
(modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+8-6)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index be3cd8a76503b..98838af2030ad 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2221,6 +2221,15 @@ let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
   def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
 }
 
+//
+// Multi-vector saturating rounding shift right narrow and interleave
+//
+let SVETargetGuard = "sve2p3", SMETargetGuard = "sme2p3" in {
+  def SVSQRSHRN_X2_2P3  : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "s",  MergeNone, "aarch64_sve_sqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+  def SVUQRSHRN_X2_2P3  : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Us", MergeNone, "aarch64_sve_uqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+  def SVSQRSHRUN_X2_2P3 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "s", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+}
+
 let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in {
   def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq1", [VerifyRuntimeMode], []>;
   def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq2", [VerifyRuntimeMode], []>;
@@ -2300,6 +2309,15 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme-f16f16" in {
   def SVCVTL_F32_X2 : SInst<"svcvtl_f32[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvtl_widen_x2", [ IsStreaming],[]>;
 }
 
+//
+// Multi-vector saturating shift right narrow and interleave
+//
+let SVETargetGuard = "sve2p3", SMETargetGuard = "sme2p3" in {
+  def SVSQSHRN_X2 : SInst<"svqshrn[_n]_{0}[_{d}_x2]", "h2i", "is",  MergeNone, "aarch64_sve_sqshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+  def SVUQSHRN_X2 : SInst<"svqshrn[_n]_{0}[_{d}_x2]", "e2i", "UiUs",  MergeNone, "aarch64_sve_uqshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+  def SVSQSHRUN_X2 : SInst<"svqshrun[_n]_{0}[_{d}_x2]", "e2i", "is",  MergeNone, "aarch64_sve_sqshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+}
+
 //
 // Multi-vector saturating extract narrow
 //
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c
new file mode 100644
index 0000000000000..4d7bf51d33913
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c
@@ -0,0 +1,148 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme                       -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrn_n_s8_s16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svqrshrn_n_s8_s16_x211svint16x2_tm(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+svint8_t test_svqrshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm) ATTR
+{
+    return SVE_ACLE_FUNC(svqrshrn,_n,_s8,_s16_x2,)(zn, 8);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrn_n_u8_u16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svqrshrn_n_u8_u16_x212svuint16x2_tm(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+svuint8_t test_svqrshrn_n_u8_u16_x2(svuint16x2_t zn, uint64_t imm) ATTR
+{
+  return SVE_ACLE_FUNC(svqrshrn,_n,_u8,_u16_x2,)(zn, 8);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrun_n_u8_s16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z26test_svqrshrun_n_u8_s16_x211svint16x2_tm(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+svuint8_t test_svqrshrun_n_u8_s16_x2(svint16x2_t zn, uint64_t imm) ATTR
+{
+  return SVE_ACLE_FUNC(svqrshrun,_n,_u8,_s16_x2,)(zn, 8);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c
new file mode 100644
index 0000000000000..60da63d609880
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c
@@ -0,0 +1,271 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme                       -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqshrn_n_s8_s16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z24test_svqshrn_n_s8_s16_x211svint16x2_tm(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
...
[truncated]

llvmbot · 2026-03-12T10:55:10Z

@llvm/pr-subscribers-llvm-ir

Author: None (amilendra)

Changes

Add the following new clang intrinsics based on the ACLE specification ARM-software/acle#428 (Add alpha support for 9.7 data processing intrinsics)

Multi-vector saturating rounding shift right narrow and interleave instructions

SQRSHRN
- svint8_t svqrshrn_s8(svint16x2_t, uint64_t) / svint8_t svqrshrn_n_s8_s16_x2(svint16x2_t, uint64_t)
UQRSHRN
- svuint8_t svqrshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqrshrn_n_u8_u16_x2(svuint16x2_t, uint64_t)
SQRSHRUN
- svuint8_t svqrshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqrshrun_n_u8_s16_x2(svint16x2_t, uint64_t)

Multi-vector saturating shift right narrow and interleave

SQSHRN
- svint8_t svqshrn_s8(svint16x2_t, uint64_t) / svint8_t svqshrn_n_s8_s16_x2(svint16x2_t, uint64_t)
- svint16_t svqshrn_s16(svint32x2_t, uint64_t) / svint16_t svqshrn_n_s16_s32_x2(svint32x2_t, uint64_t)
UQSHRN
- svuint8_t svqshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqshrn_n_u8_u16_x2(svuint16x2_t, uint64_t)
- svuint16_t svqshrn_u16(svuint32x2_t, uint64_t) / svuint16_t svqshrn_n_u16_u32_x2(svuint32x2_t, uint64_t)
SQSHRUN
- svuint8_t svqshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqshrun_n_u8_s16_x2(svint16x2_t, uint64_t)
- svuint16_t svqshrun_u16(svint32x2_t, uint64_t) / svuint16_t svqshrun_n_u16_s32_x2(svint32x2_t, uint64_t)

Patch is 70.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/186087.diff

10 Files Affected:

(modified) clang/include/clang/Basic/arm_sve.td (+18)
(added) clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c (+148)
(added) clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c (+271)
(added) clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c (+160)
(added) clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_sme2p3.c (+161)
(added) clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3.cpp (+51)
(added) clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp (+87)
(modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+7-1)
(modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+9-9)
(modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+8-6)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index be3cd8a76503b..98838af2030ad 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2221,6 +2221,15 @@ let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
   def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
 }
 
+//
+// Multi-vector saturating rounding shift right narrow and interleave
+//
+let SVETargetGuard = "sve2p3", SMETargetGuard = "sme2p3" in {
+  def SVSQRSHRN_X2_2P3  : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "s",  MergeNone, "aarch64_sve_sqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+  def SVUQRSHRN_X2_2P3  : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Us", MergeNone, "aarch64_sve_uqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+  def SVSQRSHRUN_X2_2P3 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "s", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+}
+
 let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in {
   def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq1", [VerifyRuntimeMode], []>;
   def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq2", [VerifyRuntimeMode], []>;
@@ -2300,6 +2309,15 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme-f16f16" in {
   def SVCVTL_F32_X2 : SInst<"svcvtl_f32[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvtl_widen_x2", [ IsStreaming],[]>;
 }
 
+//
+// Multi-vector saturating shift right narrow and interleave
+//
+let SVETargetGuard = "sve2p3", SMETargetGuard = "sme2p3" in {
+  def SVSQSHRN_X2 : SInst<"svqshrn[_n]_{0}[_{d}_x2]", "h2i", "is",  MergeNone, "aarch64_sve_sqshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+  def SVUQSHRN_X2 : SInst<"svqshrn[_n]_{0}[_{d}_x2]", "e2i", "UiUs",  MergeNone, "aarch64_sve_uqshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+  def SVSQSHRUN_X2 : SInst<"svqshrun[_n]_{0}[_{d}_x2]", "e2i", "is",  MergeNone, "aarch64_sve_sqshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
+}
+
 //
 // Multi-vector saturating extract narrow
 //
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c
new file mode 100644
index 0000000000000..4d7bf51d33913
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c
@@ -0,0 +1,148 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme                       -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrn_n_s8_s16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svqrshrn_n_s8_s16_x211svint16x2_tm(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+svint8_t test_svqrshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm) ATTR
+{
+    return SVE_ACLE_FUNC(svqrshrn,_n,_s8,_s16_x2,)(zn, 8);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrn_n_u8_u16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svqrshrn_n_u8_u16_x212svuint16x2_tm(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+svuint8_t test_svqrshrn_n_u8_u16_x2(svuint16x2_t zn, uint64_t imm) ATTR
+{
+  return SVE_ACLE_FUNC(svqrshrn,_n,_u8,_u16_x2,)(zn, 8);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrun_n_u8_s16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z26test_svqrshrun_n_u8_s16_x211svint16x2_tm(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+svuint8_t test_svqrshrun_n_u8_s16_x2(svint16x2_t zn, uint64_t imm) ATTR
+{
+  return SVE_ACLE_FUNC(svqrshrun,_n,_u8,_s16_x2,)(zn, 8);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c
new file mode 100644
index 0000000000000..60da63d609880
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c
@@ -0,0 +1,271 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme                       -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqshrn_n_s8_s16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z24test_svqshrn_n_s8_s16_x211svint16x2_tm(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16
+// CPP-CHECK-NEXT:    [[IMM_ADDR:%.*]] = alloca i64, align 8
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16
+// CPP-CHECK-NEXT:    store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    store i64 [[IMM]], ptr [[IMM_ADDR]], align 8
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP5]]
...
[truncated]

clang/include/clang/Basic/arm_sve.td

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c

...st/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c

clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_sme2p3.c

jthackray · 2026-03-17T12:00:14Z

clang/include/clang/Basic/arm_sve.td

@@ -2221,6 +2221,15 @@ let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
  def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
 }


These are now replicated by the lines just below?

The guards for these intrinsics are different. SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3". So I think they cannot be merged. Let me know if otherwise.

davemgreen · 2026-03-20T07:35:39Z

Could you add llvm-ir test for the patterns of the intrinsics you are adding?

CarolineConcatto

It looks good to me, just need to update the test in clang to see if they fail when lowering to assembly. But the implementation is fine

CarolineConcatto · 2026-03-31T10:11:18Z

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c

+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme                       -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve                       -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme                       -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+


Can we add
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -S -O3 -o /dev/null %s

CarolineConcatto · 2026-03-31T10:11:39Z

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c

+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve                       -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme                       -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// REQUIRES: aarch64-registered-target


One more test line and we should be good here:
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -S -O3 -o /dev/null %s

…tions Add the following new clang intrinsics based on the ACLE specification ARM-software/acle#428 (Add alpha support for 9.7 data processing intrinsics) Multi-vector saturating rounding shift right narrow and interleave instructions - SQRSHRN - svint8_t svqrshrn_s8(svint16x2_t, uint64_t) / svint8_t svqrshrn_n_s8_s16_x2(svint16x2_t, uint64_t) - UQRSHRN - svuint8_t svqrshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqrshrn_n_u8_u16_x2(svuint16x2_t, uint64_t) - SQRSHRUN - svuint8_t svqrshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqrshrun_n_u8_s16_x2(svint16x2_t, uint64_t) Multi-vector saturating shift right narrow and interleave - SQSHRN - svint8_t svqshrn_s8(svint16x2_t, uint64_t) / svint8_t svqshrn_n_s8_s16_x2(svint16x2_t, uint64_t) - svint16_t svqshrn_s16(svint32x2_t, uint64_t) / svint16_t svqshrn_n_s16_s32_x2(svint32x2_t, uint64_t) - UQSHRN - svuint8_t svqshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqshrn_n_u8_u16_x2(svuint16x2_t, uint64_t) - svuint16_t svqshrn_u16(svuint32x2_t, uint64_t) / svuint16_t svqshrn_n_u16_u32_x2(svuint32x2_t, uint64_t) - SQSHRUN - svuint8_t svqshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqshrun_n_u8_s16_x2(svint16x2_t, uint64_t) - svuint16_t svqshrun_u16(svint32x2_t, uint64_t) / svuint16_t svqshrun_n_u16_s32_x2(svint32x2_t, uint64_t)

kmclaughlin-arm

I've left a few minor comments on the tests, but otherwise this LGTM

kmclaughlin-arm · 2026-04-13T10:40:39Z

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c

@@ -0,0 +1,144 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s


I think -target-feature +sve2 can be removed from all of these RUN lines.

kmclaughlin-arm · 2026-04-13T10:44:08Z

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c

@@ -0,0 +1,255 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK


Same here, I think -target-feature +sve2 is not needed.

kmclaughlin-arm · 2026-04-13T10:52:56Z

llvm/test/CodeGen/AArch64/sve2p3-intrinsics/sve2p3-intrinsics-qrshr.ll

+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s | FileCheck %s


Subreg liveness is enabled by default for streaming functions, so the flag can be removed from the RUN lines with -force-streaming.

kmclaughlin-arm · 2026-04-13T10:55:09Z

llvm/test/CodeGen/AArch64/sve2p3-intrinsics/sve2p3-intrinsics-qshr.ll

+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s | FileCheck %s


As above, -enable-subreg-liveness can be removed here :)

llvmbot added backend:AArch64 clang:frontend Language frontend issues, e.g. anything involving "Sema" llvm:ir labels Mar 12, 2026

DavidSpickett mentioned this pull request Mar 12, 2026

[llvm][utils] Handle Issue/PR authors having no display name set #186094

Merged

amilendra requested review from CarolineConcatto, Lukacma, jthackray and kmclaughlin-arm March 12, 2026 13:39

CarolineConcatto reviewed Mar 17, 2026

View reviewed changes

jthackray reviewed Mar 17, 2026

View reviewed changes

amilendra force-pushed the v9.7_shift_ops branch from f1a68c9 to 6646f2e Compare March 18, 2026 17:30

amilendra force-pushed the v9.7_shift_ops branch from 6646f2e to 495a066 Compare March 23, 2026 20:48

CarolineConcatto approved these changes Mar 31, 2026

View reviewed changes

amilendra added 4 commits March 31, 2026 13:20

Address review comments

37470a9

Add LLVM-IR tests

51113bb

Add compilation tests

a0b4a7d

amilendra force-pushed the v9.7_shift_ops branch from 495a066 to a0b4a7d Compare March 31, 2026 13:38

kmclaughlin-arm approved these changes Apr 13, 2026

View reviewed changes

		@@ -2221,6 +2221,15 @@ let SVETargetGuard = "sve2p1\|sme2", SMETargetGuard = "sve2p1\|sme2" in {
		def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
		}

		@@ -0,0 +1,144 @@
		// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
		// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \| FileCheck %s

		; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s \| FileCheck %s
		; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s \| FileCheck %s

Conversation

amilendra commented Mar 12, 2026

Uh oh!

llvmbot commented Mar 12, 2026

Uh oh!

llvmbot commented Mar 12, 2026

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

davemgreen commented Mar 20, 2026

Uh oh!

CarolineConcatto left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

kmclaughlin-arm left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants