From b0c5d6c36d6e4d9cc3fb59c80b4bf36fbd240a22 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Mon, 23 Jun 2025 11:59:45 -0400 Subject: [PATCH] [DirectX] make firstbitlow intrinsic use first argument instead of return for overload type fixes #144966 Easy fix just add `dx_firstbitlow` to `DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg` --- .../DirectX/DirectXTargetTransformInfo.cpp | 1 + llvm/test/CodeGen/DirectX/firstbitlow.ll | 21 +++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index e426c5249f930..84b2603b4c575 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -30,6 +30,7 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, switch (ID) { case Intrinsic::dx_asdouble: case Intrinsic::dx_isinf: + case Intrinsic::dx_firstbitlow: return OpdIdx == 0; default: return OpdIdx == -1; diff --git a/llvm/test/CodeGen/DirectX/firstbitlow.ll b/llvm/test/CodeGen/DirectX/firstbitlow.ll index 884ec1164fc99..fe3786d3fbebf 100644 --- a/llvm/test/CodeGen/DirectX/firstbitlow.ll +++ b/llvm/test/CodeGen/DirectX/firstbitlow.ll @@ -41,7 +41,20 @@ entry: ret <4 x i32> %2 } -declare i32 @llvm.dx.firstbitlow.i16(i16) -declare i32 @llvm.dx.firstbitlow.i32(i32) -declare i32 @llvm.dx.firstbitlow.i64(i64) -declare <4 x i32> @llvm.dx.firstbitlow.v4i32(<4 x i32>) +define noundef <4 x i32> @test_firstbitlow_vec4_i16(<4 x i16> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x i16> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i16(i32 32, i16 [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x i16> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i16(i32 32, i16 [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x i16> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i16(i32 32, i16 [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x i16> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i16(i32 32, i16 [[ee3]]) + ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + %2 = call <4 x i32> @llvm.dx.firstbitlow.v4i16(<4 x i16> %a) + ret <4 x i32> %2 +}