From 15e97642cec6b169f580ddb03c27925b40b3fb58 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Wed, 5 Mar 2025 04:45:35 +0000 Subject: [PATCH 1/2] [msan] Handle Arm NEON pairwise min/max instructions Change the handling of: - llvm.aarch64.neon.fmaxp - llvm.aarch64.neon.fminp - llvm.aarch64.neon.fmaxnmp - llvm.aarch64.neon.fminnmp - llvm.aarch64.neon.smaxp - llvm.aarch64.neon.sminp - llvm.aarch64.neon.umaxp - llvm.aarch64.neon.uminp from the incorrect heuristic handler (maybeHandleSimpleNomemIntrinsic) to handlePairwiseShadowOrIntrinsic. Updates the tests from https://github.com/llvm/llvm-project/pull/129760 --- .../Instrumentation/MemorySanitizer.cpp | 12 ++ .../MemorySanitizer/AArch64/arm64-vmax.ll | 150 +++++++++++++----- 2 files changed, 122 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 52e42932fc751..6f1d1a97867a7 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4871,6 +4871,18 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(&I, getCleanOrigin()); break; + // TODO: handling max/min similarly to AND/OR may be more precise + // Floating-Point Maximum/Minimum Pairwise + case Intrinsic::aarch64_neon_fmaxp: + case Intrinsic::aarch64_neon_fminp: + // Floating-Point Maximum/Minimum Number Pairwise + case Intrinsic::aarch64_neon_fmaxnmp: + case Intrinsic::aarch64_neon_fminnmp: + // Signed/Unsigned Maximum/Minimum Pairwise + case Intrinsic::aarch64_neon_smaxp: + case Intrinsic::aarch64_neon_sminp: + case Intrinsic::aarch64_neon_umaxp: + case Intrinsic::aarch64_neon_uminp: // Add Pairwise case Intrinsic::aarch64_neon_addp: // Floating-point Add Pairwise diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmax.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmax.ll index a46a5b1c747db..e2457c0a51d46 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmax.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmax.ll @@ -3,7 +3,8 @@ ; ; Forked from llvm/test/CodeGen/AArch64/arm64-vmax.ll ; -; Pairwise instructions which are handled incorrectly by heuristics: +; Pairwise instructions which are explicitly handled by +; handlePairwiseShadowOrIntrinsic: ; - llvm.aarch64.neon.fmaxp (floating-point maximum pairwise) ; - llvm.aarch64.neon.fminp ; - llvm.aarch64.neon.fmaxnmp (floating-point maximum number pairwise) @@ -1017,7 +1018,9 @@ define <8 x i8> @smaxp_8b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> [[TMPVAR1]], <8 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMPVAR3]] @@ -1056,7 +1059,9 @@ define <16 x i8> @smaxp_16b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> [[TMPVAR1]], <16 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMPVAR3]] @@ -1095,7 +1100,9 @@ define <4 x i16> @smaxp_4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[TMPVAR1]], <4 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMPVAR3]] @@ -1134,7 +1141,9 @@ define <8 x i16> @smaxp_8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMPVAR3]] @@ -1173,7 +1182,9 @@ define <2 x i32> @smaxp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[TMPVAR1]], <2 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMPVAR3]] @@ -1212,7 +1223,9 @@ define <4 x i32> @smaxp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMPVAR3]] @@ -1258,7 +1271,9 @@ define <8 x i8> @umaxp_8b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> [[TMPVAR1]], <8 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMPVAR3]] @@ -1297,7 +1312,9 @@ define <16 x i8> @umaxp_16b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> [[TMPVAR1]], <16 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMPVAR3]] @@ -1336,7 +1353,9 @@ define <4 x i16> @umaxp_4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[TMPVAR1]], <4 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMPVAR3]] @@ -1375,7 +1394,9 @@ define <8 x i16> @umaxp_8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMPVAR3]] @@ -1414,7 +1435,9 @@ define <2 x i32> @umaxp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[TMPVAR1]], <2 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMPVAR3]] @@ -1453,7 +1476,9 @@ define <4 x i32> @umaxp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMPVAR3]] @@ -1500,7 +1525,9 @@ define <8 x i8> @sminp_8b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> [[TMPVAR1]], <8 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMPVAR3]] @@ -1539,7 +1566,9 @@ define <16 x i8> @sminp_16b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> [[TMPVAR1]], <16 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMPVAR3]] @@ -1578,7 +1607,9 @@ define <4 x i16> @sminp_4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[TMPVAR1]], <4 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMPVAR3]] @@ -1617,7 +1648,9 @@ define <8 x i16> @sminp_8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMPVAR3]] @@ -1656,7 +1689,9 @@ define <2 x i32> @sminp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[TMPVAR1]], <2 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMPVAR3]] @@ -1695,7 +1730,9 @@ define <4 x i32> @sminp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMPVAR3]] @@ -1741,7 +1778,9 @@ define <8 x i8> @uminp_8b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> [[TMPVAR1]], <8 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMPVAR3]] @@ -1780,7 +1819,9 @@ define <16 x i8> @uminp_16b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> [[TMPVAR1]], <16 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMPVAR3]] @@ -1819,7 +1860,9 @@ define <4 x i16> @uminp_4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[TMPVAR1]], <4 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMPVAR3]] @@ -1858,7 +1901,9 @@ define <8 x i16> @uminp_8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMPVAR3]] @@ -1897,7 +1942,9 @@ define <2 x i32> @uminp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[TMPVAR1]], <2 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMPVAR3]] @@ -1936,7 +1983,9 @@ define <4 x i32> @uminp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMPVAR3]] @@ -2103,7 +2152,9 @@ define <2 x float> @fmaxp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[TMPVAR1]], <2 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x float> [[TMPVAR3]] @@ -2142,7 +2193,9 @@ define <4 x float> @fmaxp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[TMPVAR1]], <4 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[TMPVAR3]] @@ -2181,7 +2234,9 @@ define <2 x double> @fmaxp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[TMPVAR1]], <2 x double> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[TMPVAR3]] @@ -2345,7 +2400,9 @@ define <2 x float> @fminp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[TMPVAR1]], <2 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x float> [[TMPVAR3]] @@ -2384,7 +2441,9 @@ define <4 x float> @fminp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> [[TMPVAR1]], <4 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[TMPVAR3]] @@ -2423,7 +2482,9 @@ define <2 x double> @fminp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> [[TMPVAR1]], <2 x double> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[TMPVAR3]] @@ -2466,7 +2527,9 @@ define <2 x float> @fminnmp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[TMPVAR1]], <2 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x float> [[TMPVAR3]] @@ -2505,7 +2568,9 @@ define <4 x float> @fminnmp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[TMPVAR1]], <4 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[TMPVAR3]] @@ -2544,7 +2609,9 @@ define <2 x double> @fminnmp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[TMPVAR1]], <2 x double> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[TMPVAR3]] @@ -2587,7 +2654,9 @@ define <2 x float> @fmaxnmp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> [[TMPVAR1]], <2 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x float> [[TMPVAR3]] @@ -2626,7 +2695,9 @@ define <4 x float> @fmaxnmp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> [[TMPVAR1]], <4 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[TMPVAR3]] @@ -2665,7 +2736,9 @@ define <2 x double> @fmaxnmp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> [[TMPVAR1]], <2 x double> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[TMPVAR3]] @@ -2681,6 +2754,3 @@ declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) n declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone attributes #0 = { sanitize_memory } -;. -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} -;. From ad0b18674ab364c42d7cc0d2b0e275b2c0ef69e5 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Wed, 5 Mar 2025 18:38:44 +0000 Subject: [PATCH 2/2] Add note that maybeHandleSimpleNomemIntrinsic may incorrectly match horizontal/pairwise intrinsics --- llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 6f1d1a97867a7..de7f023e2c528 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3129,6 +3129,9 @@ struct MemorySanitizerVisitor : public InstVisitor { /// fine). /// /// Caller guarantees that this intrinsic does not access memory. + /// + /// TODO: "horizontal"/"pairwise" intrinsics are often incorrectly matched by + /// by this handler. [[maybe_unused]] bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I, unsigned int trailingFlags) {