diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index e432f0cb7d897..47f3fd20148f7 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -325,7 +325,7 @@ Value *AA::getWithType(Value &V, Type &Ty) { if (isa(V)) return UndefValue::get(&Ty); if (auto *C = dyn_cast(&V)) { - if (C->isNullValue()) + if (C->isNullValue() && !Ty.isPtrOrPtrVectorTy()) return Constant::getNullValue(&Ty); if (C->getType()->isPointerTy() && Ty.isPointerTy()) return ConstantExpr::getPointerCast(C, &Ty); diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index f176f34f84736..a6a0a9a3c9015 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -217,7 +217,7 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 { ; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast: ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} -; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s{{[0-9]+}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} ; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]] define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 { @@ -260,7 +260,7 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 { ; FIXME: Shouldn't need to enable queue ptr ; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast: ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} -; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s{{[0-9]+}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} ; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]] define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { diff --git a/llvm/test/Transforms/Attributor/AMDGPU/do-not-replace-addrspacecast-with-constantpointernull.ll b/llvm/test/Transforms/Attributor/AMDGPU/do-not-replace-addrspacecast-with-constantpointernull.ll new file mode 100644 index 0000000000000..fb4153bac808e --- /dev/null +++ b/llvm/test/Transforms/Attributor/AMDGPU/do-not-replace-addrspacecast-with-constantpointernull.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=attributor %s -o - | FileCheck %s + +define i32 @addrspacecast_ptr(ptr %p0, ptr addrspace(5) %p5) { +; CHECK-LABEL: define i32 @addrspacecast_ptr( +; CHECK-SAME: ptr nofree readonly captures(none) [[P0:%.*]], ptr addrspace(5) nofree readonly [[P5:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq ptr addrspace(5) [[P5]], addrspacecast (ptr null to ptr addrspace(5)) +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], ptr [[P0]], ptr null +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[SELECT]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; + %icmp = icmp eq ptr addrspace(5) %p5, addrspacecast (ptr null to ptr addrspace(5)) + %select = select i1 %icmp, ptr %p0, ptr null + %load = load i32, ptr %select, align 4 + ret i32 %load +} + +define i32 @vec_addrspacecast_ptr(ptr %p0, ptr %p1, <2 x ptr addrspace(5)> %ptrvec) { +; CHECK-LABEL: define i32 @vec_addrspacecast_ptr( +; CHECK-SAME: ptr nofree readonly captures(none) [[P0:%.*]], ptr nofree noundef nonnull readonly align 16 captures(none) dereferenceable(8) [[P1:%.*]], <2 x ptr addrspace(5)> [[PTRVEC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x ptr addrspace(5)>, ptr [[P1]], align 16 +; CHECK-NEXT: [[ICMPVEC:%.*]] = icmp eq <2 x ptr addrspace(5)> [[LOADVEC]], +; CHECK-NEXT: [[ICMP:%.*]] = extractelement <2 x i1> [[ICMPVEC]], i32 1 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], ptr [[P0]], ptr null +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[SELECT]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; + %loadvec = load <2 x ptr addrspace(5)>, ptr %p1, align 16 + %icmpvec = icmp eq <2 x ptr addrspace(5)> %loadvec, + %icmp = extractelement <2 x i1> %icmpvec, i32 1 + %select = select i1 %icmp, ptr %p0, ptr null + %load = load i32, ptr %select, align 4 + ret i32 %load +} + +define i32 @addrspacecast_vec_as1_ptr(ptr %p0, ptr %p1, <2 x ptr addrspace(5)> %ptrvec) { +; CHECK-LABEL: define i32 @addrspacecast_vec_as1_ptr( +; CHECK-SAME: ptr nofree readonly captures(none) [[P0:%.*]], ptr nofree noundef nonnull readonly align 16 captures(none) dereferenceable(8) [[P1:%.*]], <2 x ptr addrspace(5)> [[PTRVEC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x ptr addrspace(5)>, ptr [[P1]], align 16 +; CHECK-NEXT: [[ICMPVEC:%.*]] = icmp eq <2 x ptr addrspace(5)> [[LOADVEC]], +; CHECK-NEXT: [[ICMP:%.*]] = extractelement <2 x i1> [[ICMPVEC]], i32 1 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], ptr [[P0]], ptr null +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[SELECT]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; + %loadvec = load <2 x ptr addrspace(5)>, ptr %p1, align 16 + %icmpvec = icmp eq <2 x ptr addrspace(5)> %loadvec, addrspacecast (<2 x ptr addrspace(1)> zeroinitializer to <2 x ptr addrspace(5)>) + %icmp = extractelement <2 x i1> %icmpvec, i32 1 + %select = select i1 %icmp, ptr %p0, ptr null + %load = load i32, ptr %select, align 4 + ret i32 %load +} + +define i32 @addrspacecast_vec_ptr(ptr %p0, ptr %p1, <2 x ptr addrspace(5)> %ptrvec) { +; CHECK-LABEL: define i32 @addrspacecast_vec_ptr( +; CHECK-SAME: ptr nofree readonly captures(none) [[P0:%.*]], ptr nofree noundef nonnull readonly align 16 captures(none) dereferenceable(8) [[P1:%.*]], <2 x ptr addrspace(5)> [[PTRVEC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x ptr addrspace(5)>, ptr [[P1]], align 16 +; CHECK-NEXT: [[ICMPVEC:%.*]] = icmp eq <2 x ptr addrspace(5)> [[LOADVEC]], +; CHECK-NEXT: [[ICMP:%.*]] = extractelement <2 x i1> [[ICMPVEC]], i32 1 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], ptr [[P0]], ptr null +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[SELECT]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; + %loadvec = load <2 x ptr addrspace(5)>, ptr %p1, align 16 + %icmpvec = icmp eq <2 x ptr addrspace(5)> %loadvec, addrspacecast (<2 x ptr> zeroinitializer to <2 x ptr addrspace(5)>) + %icmp = extractelement <2 x i1> %icmpvec, i32 1 + %select = select i1 %icmp, ptr %p0, ptr null + %load = load i32, ptr %select, align 4 + ret i32 %load +} + diff --git a/llvm/test/Transforms/Attributor/AMDGPU/lit.local.cfg b/llvm/test/Transforms/Attributor/AMDGPU/lit.local.cfg new file mode 100644 index 0000000000000..7c492428aec76 --- /dev/null +++ b/llvm/test/Transforms/Attributor/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not "AMDGPU" in config.root.targets: + config.unsupported = True