From e553ae29ef612e715c15cec364362937ed180f92 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 4 Mar 2025 20:17:52 +0000 Subject: [PATCH 1/2] [msan][NFC] Add expand-experimental-reductions.ll Forked from llvm/test/CodeGen/Generic/expand-experimental-reductions.ll Handled suboptimally by visitInstruction: - llvm.vector.reduce.smax - llvm.vector.reduce.smin - llvm.vector.reduce.umax - llvm.vector.reduce.umin - llvm.vector.reduce.fmax - llvm.vector.reduce.fmin --- .../expand-experimental-reductions.ll | 417 ++++++++++++++++++ 1 file changed, 417 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll b/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll new file mode 100644 index 0000000000000..473b88b1c9fa1 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll @@ -0,0 +1,417 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=msan -S | FileCheck %s +; +; Forked from llvm/test/CodeGen/Generic/expand-experimental-reductions.ll +; +; Handled suboptimally by visitInstruction: +; - llvm.vector.reduce.smax +; - llvm.vector.reduce.smin +; - llvm.vector.reduce.umax +; - llvm.vector.reduce.umin +; - llvm.vector.reduce.fmax +; - llvm.vector.reduce.fmin + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) + +declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) + +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) + +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) + +declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>) + +define i64 @add_i64(<2 x i64> %vec) #0 { +; CHECK-LABEL: define i64 @add_i64( +; CHECK-SAME: <2 x i64> [[VEC:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[VEC]]) +; CHECK-NEXT: store i64 [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[R]] +; +entry: + %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %vec) + ret i64 %r +} + +define i64 @mul_i64(<2 x i64> %vec) #0 { +; CHECK-LABEL: define i64 @mul_i64( +; CHECK-SAME: <2 x i64> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[VEC]]) +; CHECK-NEXT: store i64 [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[R]] +; +entry: + %r = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %vec) + ret i64 %r +} + +define i64 @and_i64(<2 x i64> %vec) #0 { +; CHECK-LABEL: define i64 @and_i64( +; CHECK-SAME: <2 x i64> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i64> [[VEC]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[VEC]]) +; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[R]] +; +entry: + %r = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %vec) + ret i64 %r +} + +define i64 @or_i64(<2 x i64> %vec) #0 { +; CHECK-LABEL: define i64 @or_i64( +; CHECK-SAME: <2 x i64> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i64> [[VEC]], splat (i64 -1) +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[VEC]]) +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[R]] +; +entry: + %r = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %vec) + ret i64 %r +} + +define i64 @xor_i64(<2 x i64> %vec) #0 { +; CHECK-LABEL: define i64 @xor_i64( +; CHECK-SAME: <2 x i64> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP0]]) +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> [[VEC]]) +; CHECK-NEXT: store i64 [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[R]] +; +entry: + %r = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %vec) + ret i64 %r +} + +define float @fadd_f32(<4 x float> %vec) #0 { +; CHECK-LABEL: define float @fadd_f32( +; CHECK-SAME: <4 x float> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i32 0, [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VEC]]) +; CHECK-NEXT: store i32 [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %vec) + ret float %r +} + +define float @fadd_f32_accum(float %accum, <4 x float> %vec) #0 { +; CHECK-LABEL: define float @fadd_f32_accum( +; CHECK-SAME: float [[ACCUM:%.*]], <4 x float> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[ACCUM]], <4 x float> [[VEC]]) +; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec) + ret float %r +} + +define float @fadd_f32_strict(<4 x float> %vec) #0 { +; CHECK-LABEL: define float @fadd_f32_strict( +; CHECK-SAME: <4 x float> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i32 -1, [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> [[VEC]]) +; CHECK-NEXT: store i32 [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec) + ret float %r +} + +define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) #0 { +; CHECK-LABEL: define float @fadd_f32_strict_accum( +; CHECK-SAME: float [[ACCUM:%.*]], <4 x float> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float [[ACCUM]], <4 x float> [[VEC]]) +; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec) + ret float %r +} + +define float @fmul_f32(<4 x float> %vec) #0 { +; CHECK-LABEL: define float @fmul_f32( +; CHECK-SAME: <4 x float> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i32 0, [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[VEC]]) +; CHECK-NEXT: store i32 [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %vec) + ret float %r +} + +define float @fmul_f32_accum(float %accum, <4 x float> %vec) #0 { +; CHECK-LABEL: define float @fmul_f32_accum( +; CHECK-SAME: float [[ACCUM:%.*]], <4 x float> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float [[ACCUM]], <4 x float> [[VEC]]) +; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec) + ret float %r +} + +define float @fmul_f32_strict(<4 x float> %vec) #0 { +; CHECK-LABEL: define float @fmul_f32_strict( +; CHECK-SAME: <4 x float> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i32 -1, [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float undef, <4 x float> [[VEC]]) +; CHECK-NEXT: store i32 [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec) + ret float %r +} + +define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) #0 { +; CHECK-LABEL: define float @fmul_f32_strict_accum( +; CHECK-SAME: float [[ACCUM:%.*]], <4 x float> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float [[ACCUM]], <4 x float> [[VEC]]) +; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec) + ret float %r +} + +define i64 @smax_i64(<2 x i64> %vec) #0 { +; CHECK-LABEL: define i64 @smax_i64( +; CHECK-SAME: <2 x i64> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB2]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: [[BB3]]: +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[VEC]]) +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[R]] +; +entry: + %r = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec) + ret i64 %r +} + +define i64 @smin_i64(<2 x i64> %vec) #0 { +; CHECK-LABEL: define i64 @smin_i64( +; CHECK-SAME: <2 x i64> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] +; CHECK: [[BB2]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: [[BB3]]: +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> [[VEC]]) +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[R]] +; +entry: + %r = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec) + ret i64 %r +} + +define i64 @umax_i64(<2 x i64> %vec) #0 { +; CHECK-LABEL: define i64 @umax_i64( +; CHECK-SAME: <2 x i64> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] +; CHECK: [[BB2]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: [[BB3]]: +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> [[VEC]]) +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[R]] +; +entry: + %r = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec) + ret i64 %r +} + +define i64 @umin_i64(<2 x i64> %vec) #0 { +; CHECK-LABEL: define i64 @umin_i64( +; CHECK-SAME: <2 x i64> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] +; CHECK: [[BB2]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: [[BB3]]: +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[VEC]]) +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[R]] +; +entry: + %r = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec) + ret i64 %r +} + + +define double @fmax_f64(<2 x double> %vec) #0 { +; CHECK-LABEL: define double @fmax_f64( +; CHECK-SAME: <2 x double> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] +; CHECK: [[BB2]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: [[BB3]]: +; CHECK-NEXT: [[R:%.*]] = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> [[VEC]]) +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret double [[R]] +; +entry: + %r = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %vec) + ret double %r +} + + +define double @fmin_f64(<2 x double> %vec) #0 { +; CHECK-LABEL: define double @fmin_f64( +; CHECK-SAME: <2 x double> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] +; CHECK: [[BB2]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: [[BB3]]: +; CHECK-NEXT: [[R:%.*]] = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> [[VEC]]) +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret double [[R]] +; +entry: + %r = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %vec) + ret double %r +} + + +define i8 @test_v3i8(<3 x i8> %a) nounwind #0 { +; CHECK-LABEL: define i8 @test_v3i8( +; CHECK-SAME: <3 x i8> [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i8> [[A]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> [[TMP0]]) +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[B:%.*]] = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> [[A]]) +; CHECK-NEXT: store i8 [[TMP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[B]] +; +entry: + %b = call i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8> %a) + ret i8 %b +} + +attributes #0 = { sanitize_memory } From bd052b7baa907002de71df024f6b491e8063b8d6 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 4 Mar 2025 20:34:26 +0000 Subject: [PATCH 2/2] Replace undef with param --- .../expand-experimental-reductions.ll | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll b/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll index 473b88b1c9fa1..7a4b433fc8be6 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll @@ -155,20 +155,21 @@ entry: ret float %r } -define float @fadd_f32_strict(<4 x float> %vec) #0 { +define float @fadd_f32_strict(float %param, <4 x float> %vec) #0 { ; CHECK-LABEL: define float @fadd_f32_strict( -; CHECK-SAME: <4 x float> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-SAME: float [[PARAM:%.*]], <4 x float> [[VEC:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = or i32 -1, [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> [[VEC]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float [[PARAM]], <4 x float> [[VEC]]) ; CHECK-NEXT: store i32 [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret float [[R]] ; entry: - %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec) + %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float %param, <4 x float> %vec) ret float %r } @@ -225,20 +226,21 @@ entry: ret float %r } -define float @fmul_f32_strict(<4 x float> %vec) #0 { +define float @fmul_f32_strict(float %param, <4 x float> %vec) #0 { ; CHECK-LABEL: define float @fmul_f32_strict( -; CHECK-SAME: <4 x float> [[VEC:%.*]]) #[[ATTR1]] { +; CHECK-SAME: float [[PARAM:%.*]], <4 x float> [[VEC:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = or i32 -1, [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float undef, <4 x float> [[VEC]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float [[PARAM]], <4 x float> [[VEC]]) ; CHECK-NEXT: store i32 [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret float [[R]] ; entry: - %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec) + %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float %param, <4 x float> %vec) ret float %r } @@ -415,3 +417,6 @@ entry: } attributes #0 = { sanitize_memory } +;. +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +;.