diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll new file mode 100644 index 0000000000000..a90ed74557727 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll @@ -0,0 +1,342 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=msan -S | FileCheck %s +; +; Forked from llvm/test/CodeGen/AArch64/arm64-umaxv.ll +; +; Handled suboptimally (visitInstruction): +; - llvm.aarch64.neon.umaxv.i32.v16i8 +; - llvm.aarch64.neon.umaxv.i32.v2i32 +; - llvm.aarch64.neon.umaxv.i32.v4i16 +; - llvm.aarch64.neon.umaxv.i32.v4i32 +; - llvm.aarch64.neon.umaxv.i32.v8i16 +; - llvm.aarch64.neon.umaxv.i32.v8i8 + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp { +; CHECK-LABEL: define i32 @vmax_u8x8( +; CHECK-SAME: <8 x i8> [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> [[A]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMAXV_I]] to i8 +; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[TMP]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 -1, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @bar() #[[ATTR3]] +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) nounwind + %tmp = trunc i32 %vmaxv.i to i8 + %tobool = icmp eq i8 %tmp, 0 + br i1 %tobool, label %return, label %if.then + +if.then: + %call1 = tail call i32 @bar() nounwind + br label %return + +return: + %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ] + ret i32 %retval.0 +} + +declare i32 @bar(...) + +define i32 @vmax_u4x16(<4 x i16> %a) nounwind ssp { +; CHECK-LABEL: define i32 @vmax_u4x16( +; CHECK-SAME: <4 x i16> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMAXV_I]] to i16 +; CHECK-NEXT: [[TMP0:%.*]] = xor i16 [[TMP]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 -1, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @bar() #[[ATTR3]] +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) nounwind + %tmp = trunc i32 %vmaxv.i to i16 + %tobool = icmp eq i16 %tmp, 0 + br i1 %tobool, label %return, label %if.then + +if.then: + %call1 = tail call i32 @bar() nounwind + br label %return + +return: + %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ] + ret i32 %retval.0 +} + +define i32 @vmax_u8x16(<8 x i16> %a) nounwind ssp { +; CHECK-LABEL: define i32 @vmax_u8x16( +; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMAXV_I]] to i16 +; CHECK-NEXT: [[TMP0:%.*]] = xor i16 [[TMP]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 -1, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @bar() #[[ATTR3]] +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) nounwind + %tmp = trunc i32 %vmaxv.i to i16 + %tobool = icmp eq i16 %tmp, 0 + br i1 %tobool, label %return, label %if.then + +if.then: + %call1 = tail call i32 @bar() nounwind + br label %return + +return: + %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ] + ret i32 %retval.0 +} + +define i32 @vmax_u16x8(<16 x i8> %a) nounwind ssp { +; CHECK-LABEL: define i32 @vmax_u16x8( +; CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMAXV_I]] to i8 +; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[TMP]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 -1, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @bar() #[[ATTR3]] +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) nounwind + %tmp = trunc i32 %vmaxv.i to i8 + %tobool = icmp eq i8 %tmp, 0 + br i1 %tobool, label %return, label %if.then + +if.then: + %call1 = tail call i32 @bar() nounwind + br label %return + +return: + %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ] + ret i32 %retval.0 +} + +define <8 x i8> @test_vmaxv_u8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 { +; CHECK-LABEL: define <8 x i8> @test_vmaxv_u8_used_by_laneop( +; CHECK-SAME: <8 x i8> [[A1:%.*]], <8 x i8> [[A2:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> [[A2]]) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3 +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP7]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a2) + %1 = trunc i32 %0 to i8 + %2 = insertelement <8 x i8> %a1, i8 %1, i32 3 + ret <8 x i8> %2 +} + +define <4 x i16> @test_vmaxv_u16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0 { +; CHECK-LABEL: define <4 x i16> @test_vmaxv_u16_used_by_laneop( +; CHECK-SAME: <4 x i16> [[A1:%.*]], <4 x i16> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[A2]]) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3 +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP7]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a2) + %1 = trunc i32 %0 to i16 + %2 = insertelement <4 x i16> %a1, i16 %1, i32 3 + ret <4 x i16> %2 +} + +define <2 x i32> @test_vmaxv_u32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0 { +; CHECK-LABEL: define <2 x i32> @test_vmaxv_u32_used_by_laneop( +; CHECK-SAME: <2 x i32> [[A1:%.*]], <2 x i32> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[A2]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP6]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a2) + %1 = insertelement <2 x i32> %a1, i32 %0, i32 1 + ret <2 x i32> %1 +} + +define <16 x i8> @test_vmaxvq_u8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0 { +; CHECK-LABEL: define <16 x i8> @test_vmaxvq_u8_used_by_laneop( +; CHECK-SAME: <16 x i8> [[A1:%.*]], <16 x i8> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A2]]) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3 +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP7]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a2) + %1 = trunc i32 %0 to i8 + %2 = insertelement <16 x i8> %a1, i8 %1, i32 3 + ret <16 x i8> %2 +} + +define <8 x i16> @test_vmaxvq_u16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) #0 { +; CHECK-LABEL: define <8 x i16> @test_vmaxvq_u16_used_by_laneop( +; CHECK-SAME: <8 x i16> [[A1:%.*]], <8 x i16> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A2]]) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3 +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP7]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a2) + %1 = trunc i32 %0 to i16 + %2 = insertelement <8 x i16> %a1, i16 %1, i32 3 + ret <8 x i16> %2 +} + +define <4 x i32> @test_vmaxvq_u32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) #0 { +; CHECK-LABEL: define <4 x i32> @test_vmaxvq_u32_used_by_laneop( +; CHECK-SAME: <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[A2]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3 +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP6]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a2) + %1 = insertelement <4 x i32> %a1, i32 %0, i32 3 + ret <4 x i32> %1 +} + +declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) nounwind readnone +declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>) nounwind readnone +declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>) nounwind readnone +declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) nounwind readnone +declare i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32>) nounwind readnone +declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>) nounwind readnone + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll new file mode 100644 index 0000000000000..a754ceec94506 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll @@ -0,0 +1,341 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=msan -S | FileCheck %s +; +; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll +; +; Handled suboptimally (visitInstruction): +; - llvm.aarch64.neon.uminv.i32.v16i8 +; - llvm.aarch64.neon.uminv.i32.v2i32 +; - llvm.aarch64.neon.uminv.i32.v4i16 +; - llvm.aarch64.neon.uminv.i32.v4i32 +; - llvm.aarch64.neon.uminv.i32.v8i16 +; - llvm.aarch64.neon.uminv.i32.v8i8 + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp { +; CHECK-LABEL: define i32 @vmin_u8x8( +; CHECK-SAME: <8 x i8> [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> [[A]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMINV_I]] to i8 +; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[TMP]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 -1, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @bar() #[[ATTR3]] +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) nounwind + %tmp = trunc i32 %vminv.i to i8 + %tobool = icmp eq i8 %tmp, 0 + br i1 %tobool, label %return, label %if.then + +if.then: + %call1 = tail call i32 @bar() nounwind + br label %return + +return: + %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ] + ret i32 %retval.0 +} + +declare i32 @bar(...) + +define i32 @vmin_u4x16(<4 x i16> %a) nounwind ssp { +; CHECK-LABEL: define i32 @vmin_u4x16( +; CHECK-SAME: <4 x i16> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMINV_I]] to i16 +; CHECK-NEXT: [[TMP0:%.*]] = xor i16 [[TMP]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 -1, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @bar() #[[ATTR3]] +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) nounwind + %tmp = trunc i32 %vminv.i to i16 + %tobool = icmp eq i16 %tmp, 0 + br i1 %tobool, label %return, label %if.then + +if.then: + %call1 = tail call i32 @bar() nounwind + br label %return + +return: + %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ] + ret i32 %retval.0 +} + +define i32 @vmin_u8x16(<8 x i16> %a) nounwind ssp { +; CHECK-LABEL: define i32 @vmin_u8x16( +; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMINV_I]] to i16 +; CHECK-NEXT: [[TMP0:%.*]] = xor i16 [[TMP]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 -1, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[TMP]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @bar() #[[ATTR3]] +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) nounwind + %tmp = trunc i32 %vminv.i to i16 + %tobool = icmp eq i16 %tmp, 0 + br i1 %tobool, label %return, label %if.then + +if.then: + %call1 = tail call i32 @bar() nounwind + br label %return + +return: + %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ] + ret i32 %retval.0 +} + +define i32 @vmin_u16x8(<16 x i8> %a) nounwind ssp { +; CHECK-LABEL: define i32 @vmin_u16x8( +; CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VMINV_I:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP:%.*]] = trunc i32 [[VMINV_I]] to i8 +; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[TMP]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 -1, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[RETURN:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @bar() #[[ATTR3]] +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], %[[IF_THEN]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) nounwind + %tmp = trunc i32 %vminv.i to i8 + %tobool = icmp eq i8 %tmp, 0 + br i1 %tobool, label %return, label %if.then + +if.then: + %call1 = tail call i32 @bar() nounwind + br label %return + +return: + %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ] + ret i32 %retval.0 +} + +define <8 x i8> @test_vminv_u8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 { +; CHECK-LABEL: define <8 x i8> @test_vminv_u8_used_by_laneop( +; CHECK-SAME: <8 x i8> [[A1:%.*]], <8 x i8> [[A2:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> [[A2]]) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3 +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP7]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a2) + %1 = trunc i32 %0 to i8 + %2 = insertelement <8 x i8> %a1, i8 %1, i32 3 + ret <8 x i8> %2 +} + +define <4 x i16> @test_vminv_u16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0 { +; CHECK-LABEL: define <4 x i16> @test_vminv_u16_used_by_laneop( +; CHECK-SAME: <4 x i16> [[A1:%.*]], <4 x i16> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[A2]]) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3 +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP7]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a2) + %1 = trunc i32 %0 to i16 + %2 = insertelement <4 x i16> %a1, i16 %1, i32 3 + ret <4 x i16> %2 +} + +define <2 x i32> @test_vminv_u32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0 { +; CHECK-LABEL: define <2 x i32> @test_vminv_u32_used_by_laneop( +; CHECK-SAME: <2 x i32> [[A1:%.*]], <2 x i32> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[A2]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP6]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a2) + %1 = insertelement <2 x i32> %a1, i32 %0, i32 1 + ret <2 x i32> %1 +} + +define <16 x i8> @test_vminvq_u8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0 { +; CHECK-LABEL: define <16 x i8> @test_vminvq_u8_used_by_laneop( +; CHECK-SAME: <16 x i8> [[A1:%.*]], <16 x i8> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A2]]) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3 +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP7]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a2) + %1 = trunc i32 %0 to i8 + %2 = insertelement <16 x i8> %a1, i8 %1, i32 3 + ret <16 x i8> %2 +} + +define <8 x i16> @test_vminvq_u16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) #0 { +; CHECK-LABEL: define <8 x i16> @test_vminvq_u16_used_by_laneop( +; CHECK-SAME: <8 x i16> [[A1:%.*]], <8 x i16> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A2]]) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3 +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP7]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a2) + %1 = trunc i32 %0 to i16 + %2 = insertelement <8 x i16> %a1, i16 %1, i32 3 + ret <8 x i16> %2 +} + +define <4 x i32> @test_vminvq_u32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) #0 { +; CHECK-LABEL: define <4 x i32> @test_vminvq_u32_used_by_laneop( +; CHECK-SAME: <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[A2]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3 +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP6]] +; +entry: + %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a2) + %1 = insertelement <4 x i32> %a1, i32 %0, i32 3 + ret <4 x i32> %1 +} +declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) nounwind readnone +declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>) nounwind readnone +declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) nounwind readnone +declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) nounwind readnone +declare i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32>) nounwind readnone +declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>) nounwind readnone + +attributes #0 = { sanitize_memory }