diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 84b2603b4c575..ffd900c68893f 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -31,6 +31,8 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, case Intrinsic::dx_asdouble: case Intrinsic::dx_isinf: case Intrinsic::dx_firstbitlow: + case Intrinsic::dx_firstbituhigh: + case Intrinsic::dx_firstbitshigh: return OpdIdx == 0; default: return OpdIdx == -1; diff --git a/llvm/test/CodeGen/DirectX/firstbithigh.ll b/llvm/test/CodeGen/DirectX/firstbithigh.ll index 794b0f20a0269..bf9efd30d9b2f 100644 --- a/llvm/test/CodeGen/DirectX/firstbithigh.ll +++ b/llvm/test/CodeGen/DirectX/firstbithigh.ll @@ -4,42 +4,42 @@ define noundef i32 @test_firstbithigh_ushort(i16 noundef %a) { entry: -; CHECK: call i32 @dx.op.unaryBits.i16(i32 33, i16 %{{.*}}) #[[#ATTR:]] +; CHECK: call i32 @dx.op.unaryBits.i16(i32 33, i16 %{{.*}}) %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i16(i16 %a) ret i32 %elt.firstbithigh } define noundef i32 @test_firstbithigh_short(i16 noundef %a) { entry: -; CHECK: call i32 @dx.op.unaryBits.i16(i32 34, i16 %{{.*}}) #[[#ATTR]] +; CHECK: call i32 @dx.op.unaryBits.i16(i32 34, i16 %{{.*}}) %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i16(i16 %a) ret i32 %elt.firstbithigh } define noundef i32 @test_firstbithigh_uint(i32 noundef %a) { entry: -; CHECK: call i32 @dx.op.unaryBits.i32(i32 33, i32 %{{.*}}) #[[#ATTR]] +; CHECK: call i32 @dx.op.unaryBits.i32(i32 33, i32 %{{.*}}) %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i32(i32 %a) ret i32 %elt.firstbithigh } define noundef i32 @test_firstbithigh_int(i32 noundef %a) { entry: -; CHECK: call i32 @dx.op.unaryBits.i32(i32 34, i32 %{{.*}}) #[[#ATTR]] +; CHECK: call i32 @dx.op.unaryBits.i32(i32 34, i32 %{{.*}}) %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i32(i32 %a) ret i32 %elt.firstbithigh } define noundef i32 @test_firstbithigh_ulong(i64 noundef %a) { entry: -; CHECK: call i32 @dx.op.unaryBits.i64(i32 33, i64 %{{.*}}) #[[#ATTR]] +; CHECK: call i32 @dx.op.unaryBits.i64(i32 33, i64 %{{.*}}) %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i64(i64 %a) ret i32 %elt.firstbithigh } define noundef i32 @test_firstbithigh_long(i64 noundef %a) { entry: -; CHECK: call i32 @dx.op.unaryBits.i64(i32 34, i64 %{{.*}}) #[[#ATTR]] +; CHECK: call i32 @dx.op.unaryBits.i64(i32 34, i64 %{{.*}}) %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i64(i64 %a) ret i32 %elt.firstbithigh } @@ -47,17 +47,18 @@ entry: define noundef <4 x i32> @test_firstbituhigh_vec4_i32(<4 x i32> noundef %a) { entry: ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0 - ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee0]]) #[[#ATTR]] + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee0]]) ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1 - ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee1]]) #[[#ATTR]] + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee1]]) ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2 - ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee2]]) #[[#ATTR]] + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee2]]) ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3 - ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee3]]) #[[#ATTR]] - ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee3]]) + ; CHECK: [[rt0:%.*]] = insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: [[rt1:%.*]] = insertelement <4 x i32> [[rt0]], i32 [[ie1]], i64 1 + ; CHECK: [[rt2:%.*]] = insertelement <4 x i32> [[rt1]], i32 [[ie2]], i64 2 + ; CHECK: [[rt3:%.*]] = insertelement <4 x i32> [[rt2]], i32 [[ie3]], i64 3 + ; CHECK: ret <4 x i32> [[rt3]] %2 = call <4 x i32> @llvm.dx.firstbituhigh.v4i32(<4 x i32> %a) ret <4 x i32> %2 } @@ -65,29 +66,56 @@ entry: define noundef <4 x i32> @test_firstbitshigh_vec4_i32(<4 x i32> noundef %a) { entry: ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0 - ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee0]]) #[[#ATTR]] + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee0]]) ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1 - ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee1]]) #[[#ATTR]] + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee1]]) ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2 - ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee2]]) #[[#ATTR]] + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee2]]) ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3 - ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee3]]) #[[#ATTR]] - ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee3]]) + ; CHECK: [[rt0:%.*]] = insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: [[rt1:%.*]] = insertelement <4 x i32> [[rt0]], i32 [[ie1]], i64 1 + ; CHECK: [[rt2:%.*]] = insertelement <4 x i32> [[rt1]], i32 [[ie2]], i64 2 + ; CHECK: [[rt3:%.*]] = insertelement <4 x i32> [[rt2]], i32 [[ie3]], i64 3 + ; CHECK: ret <4 x i32> [[rt3]] %2 = call <4 x i32> @llvm.dx.firstbitshigh.v4i32(<4 x i32> %a) ret <4 x i32> %2 } -; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}} - -declare i32 @llvm.dx.firstbituhigh.i16(i16) -declare i32 @llvm.dx.firstbituhigh.i32(i32) -declare i32 @llvm.dx.firstbituhigh.i64(i64) -declare <4 x i32> @llvm.dx.firstbituhigh.v4i32(<4 x i32>) +define noundef <4 x i32> @test_firstbituhigh_vec4_i16(<4 x i16> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x i16> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i16(i32 33, i16 [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x i16> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i16(i32 33, i16 [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x i16> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i16(i32 33, i16 [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x i16> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i16(i32 33, i16 [[ee3]]) + ; CHECK: [[rt0:%.*]] = insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: [[rt1:%.*]] = insertelement <4 x i32> [[rt0]], i32 [[ie1]], i64 1 + ; CHECK: [[rt2:%.*]] = insertelement <4 x i32> [[rt1]], i32 [[ie2]], i64 2 + ; CHECK: [[rt3:%.*]] = insertelement <4 x i32> [[rt2]], i32 [[ie3]], i64 3 + ; CHECK: ret <4 x i32> [[rt3]] + %2 = call <4 x i32> @llvm.dx.firstbituhigh.v4i16(<4 x i16> %a) + ret <4 x i32> %2 +} -declare i32 @llvm.dx.firstbitshigh.i16(i16) -declare i32 @llvm.dx.firstbitshigh.i32(i32) -declare i32 @llvm.dx.firstbitshigh.i64(i64) -declare <4 x i32> @llvm.dx.firstbitshigh.v4i32(<4 x i32>) +define noundef <4 x i32> @test_firstbitshigh_vec4_i16(<4 x i16> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x i16> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i16(i32 34, i16 [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x i16> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i16(i32 34, i16 [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x i16> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i16(i32 34, i16 [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x i16> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i16(i32 34, i16 [[ee3]]) + ; CHECK: [[rt0:%.*]] = insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: [[rt1:%.*]] = insertelement <4 x i32> [[rt0]], i32 [[ie1]], i64 1 + ; CHECK: [[rt2:%.*]] = insertelement <4 x i32> [[rt1]], i32 [[ie2]], i64 2 + ; CHECK: [[rt3:%.*]] = insertelement <4 x i32> [[rt2]], i32 [[ie3]], i64 3 + ; CHECK: ret <4 x i32> [[rt3]] + %2 = call <4 x i32> @llvm.dx.firstbitshigh.v4i16(<4 x i16> %a) + ret <4 x i32> %2 +} diff --git a/llvm/test/CodeGen/DirectX/firstbitlow.ll b/llvm/test/CodeGen/DirectX/firstbitlow.ll index fe3786d3fbebf..7f5f0b2dd7ed7 100644 --- a/llvm/test/CodeGen/DirectX/firstbitlow.ll +++ b/llvm/test/CodeGen/DirectX/firstbitlow.ll @@ -33,10 +33,11 @@ entry: ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee2]]) ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3 ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee3]]) - ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + ; CHECK: [[rt0:%.*]] = insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: [[rt1:%.*]] = insertelement <4 x i32> [[rt0]], i32 [[ie1]], i64 1 + ; CHECK: [[rt2:%.*]] = insertelement <4 x i32> [[rt1]], i32 [[ie2]], i64 2 + ; CHECK: [[rt3:%.*]] = insertelement <4 x i32> [[rt2]], i32 [[ie3]], i64 3 + ; CHECK: ret <4 x i32> [[rt3]] %2 = call <4 x i32> @llvm.dx.firstbitlow.v4i32(<4 x i32> %a) ret <4 x i32> %2 } @@ -51,10 +52,11 @@ entry: ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i16(i32 32, i16 [[ee2]]) ; CHECK: [[ee3:%.*]] = extractelement <4 x i16> %a, i64 3 ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i16(i32 32, i16 [[ee3]]) - ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 - ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + ; CHECK: [[rt0:%.*]] = insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: [[rt1:%.*]] = insertelement <4 x i32> [[rt0]], i32 [[ie1]], i64 1 + ; CHECK: [[rt2:%.*]] = insertelement <4 x i32> [[rt1]], i32 [[ie2]], i64 2 + ; CHECK: [[rt3:%.*]] = insertelement <4 x i32> [[rt2]], i32 [[ie3]], i64 3 + ; CHECK: ret <4 x i32> [[rt3]] %2 = call <4 x i32> @llvm.dx.firstbitlow.v4i16(<4 x i16> %a) ret <4 x i32> %2 }