diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 830c15249582c..87b885447cc02 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -401,6 +401,13 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, II->setCalledFunction(NewDecl); return true; } + case Intrinsic::prefetch: { + Function *NewDecl = + Intrinsic::getDeclaration(M, II->getIntrinsicID(), {NewV->getType()}); + II->setArgOperand(0, NewV); + II->setCalledFunction(NewDecl); + return true; + } default: { Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); if (!Rewrite) @@ -423,6 +430,7 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( PostorderStack, Visited); break; case Intrinsic::masked_gather: + case Intrinsic::prefetch: appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); break; diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/prefetch.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/prefetch.ll new file mode 100644 index 0000000000000..e2c12c4c37141 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/prefetch.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s + +define void @prefetch_shared_to_flat(ptr addrspace(3) %group.ptr) { +; CHECK-LABEL: define void @prefetch_shared_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]]) { +; CHECK-NEXT: tail call void @llvm.prefetch.p3(ptr addrspace(3) [[GROUP_PTR]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr + tail call void @llvm.prefetch.p0(ptr %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_global_to_flat(ptr addrspace(1) %global.ptr) { +; CHECK-LABEL: define void @prefetch_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]]) { +; CHECK-NEXT: tail call void @llvm.prefetch.p1(ptr addrspace(1) [[GLOBAL_PTR]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr + tail call void @llvm.prefetch.p0(ptr addrspace(0) %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_constant_to_flat(ptr addrspace(4) %const.ptr) { +; CHECK-LABEL: define void @prefetch_constant_to_flat( +; CHECK-SAME: ptr addrspace(4) [[CONST_PTR:%.*]]) { +; CHECK-NEXT: tail call void @llvm.prefetch.p4(ptr addrspace(4) [[CONST_PTR]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(4) %const.ptr to ptr + tail call void @llvm.prefetch.p0(ptr %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_flat_to_shared(ptr %flat.ptr) { +; CHECK-LABEL: define void @prefetch_flat_to_shared( +; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3) +; CHECK-NEXT: tail call void @llvm.prefetch.p3(ptr addrspace(3) [[CAST]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr %flat.ptr to ptr addrspace(3) + tail call void @llvm.prefetch.p3(ptr addrspace(3) %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_flat_to_global(ptr %flat.ptr) { +; CHECK-LABEL: define void @prefetch_flat_to_global( +; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(1) +; CHECK-NEXT: tail call void @llvm.prefetch.p1(ptr addrspace(1) [[CAST]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr %flat.ptr to ptr addrspace(1) + tail call void @llvm.prefetch.p1(ptr addrspace(1) %cast, i32 0, i32 0, i32 1) + ret void +}