diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index a84d703de0bbe..855edb2fb834d 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -337,7 +337,6 @@ void initializeSYCLLowerInvokeSimdLegacyPassPass(PassRegistry &); void initializeSYCLMutatePrintfAddrspaceLegacyPassPass(PassRegistry &); void initializeSPIRITTAnnotationsLegacyPassPass(PassRegistry &); void initializeESIMDLowerLoadStorePass(PassRegistry &); -void initializeESIMDLowerVecArgLegacyPassPass(PassRegistry &); void initializeESIMDVerifierPass(PassRegistry &); void initializeSYCLLowerWGLocalMemoryLegacyPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry&); diff --git a/llvm/include/llvm/SYCLLowerIR/ESIMD/LowerESIMD.h b/llvm/include/llvm/SYCLLowerIR/ESIMD/LowerESIMD.h index d4d7ed348363e..dc904c3711ea9 100644 --- a/llvm/include/llvm/SYCLLowerIR/ESIMD/LowerESIMD.h +++ b/llvm/include/llvm/SYCLLowerIR/ESIMD/LowerESIMD.h @@ -52,20 +52,6 @@ class ESIMDLowerLoadStorePass : public PassInfoMixin { FunctionPass *createESIMDLowerLoadStorePass(); void initializeESIMDLowerLoadStorePass(PassRegistry &); -// Pass converts simd* function parameters and globals to -// llvm's first-class vector* type. -class ESIMDLowerVecArgPass : public PassInfoMixin { -public: - PreservedAnalyses run(Module &M, ModuleAnalysisManager &); - -private: - Function *rewriteFunc(Function &F); - Type *getSimdArgPtrTyOrNull(Value *arg); -}; - -ModulePass *createESIMDLowerVecArgPass(); -void initializeESIMDLowerVecArgLegacyPassPass(PassRegistry &); - // - Converts simd* function parameters and return values passed by pointer to // pass-by-value // (where possible) diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index b320fe62e8bb9..0039aaa34e19a 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -129,7 +129,6 @@ MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) MODULE_PASS("LowerESIMD", SYCLLowerESIMDPass()) -MODULE_PASS("ESIMDLowerVecArg", ESIMDLowerVecArgPass()) MODULE_PASS("esimd-opt-call-conv", ESIMDOptimizeVecArgCallConvPass()) MODULE_PASS("esimd-verifier", ESIMDVerifierPass()) MODULE_PASS("lower-invoke-simd", SYCLLowerInvokeSimdPass()) diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 0a7fba1b2fa59..8214e9c372f04 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -54,7 +54,6 @@ add_llvm_component_library(LLVMSYCLLowerIR ESIMD/LowerESIMDKernelAttrs.cpp CompileTimePropertiesPass.cpp DeviceGlobals.cpp - ESIMD/LowerESIMDVecArg.cpp ESIMD/LowerESIMDVLoadVStore.cpp ESIMD/LowerESIMDSlmReservation.cpp HostPipes.cpp diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMDVecArg.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMDVecArg.cpp deleted file mode 100644 index 2a89091acadab..0000000000000 --- a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMDVecArg.cpp +++ /dev/null @@ -1,216 +0,0 @@ -//===-- ESIMDVecArgPass.cpp - lower Close To Metal (CM) constructs --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Change in function parameter type from simd* to native llvm vector type for -// cmc compiler to generate correct code for subroutine parameter passing: -// -// Old IR: -// ====== -// -// Parameter %0 is of type simd* -// define dso_local spir_func void @_Z3fooPiN2cm3gen4simdIiLi16EEE(i32 -// addrspace(4)* %C, -// "class.cm::gen::simd" * %0) -// local_unnamed_addr #2 { -// -// New IR: -// ====== -// -// Translate simd* parameter (#1) to vector <16 x 32>* type and insert bitcast. -// All users of old parameter will use result of the bitcast. -// -// define dso_local spir_func void @_Z3fooPiN2cm3gen4simdIiLi16EEE(i32 -// addrspace(4)* %C, -// <16 x i32>* %0) local_unnamed_addr #2 { -// entry: -// % 1 = bitcast<16 x i32> * % 0 to % -// "class.cm::gen::simd" * -// -// It is OK not to rewrite a function (for example, when its address is taken) -// since it does not affect correctness. But that may lead to vector backend -// not being able to hold the value in GRF and generate memory references. -// -//===----------------------------------------------------------------------===// - -#include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h" -#include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h" - -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" - -using namespace llvm; - -#define DEBUG_TYPE "ESIMDLowerVecArg" - -namespace { -class ESIMDLowerVecArgLegacyPass : public ModulePass { -public: - static char ID; - ESIMDLowerVecArgLegacyPass() : ModulePass(ID) { - initializeESIMDLowerVecArgLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - ModuleAnalysisManager MAM; - Impl.run(M, MAM); - return true; - } - - bool doInitialization(Module &M) override { return false; } - -private: - ESIMDLowerVecArgPass Impl; -}; -} // namespace - -char ESIMDLowerVecArgLegacyPass::ID = 0; -INITIALIZE_PASS(ESIMDLowerVecArgLegacyPass, "ESIMDLowerVecArg", - "Translate simd ptr to native vector type", false, false) - -// Public interface to VecArgPass -ModulePass *llvm::createESIMDLowerVecArgPass() { - return new ESIMDLowerVecArgLegacyPass(); -} - -// Return ptr to first-class vector type if Value is a simd*, else return -// nullptr. -Type *ESIMDLowerVecArgPass::getSimdArgPtrTyOrNull(Value *arg) { - auto ArgType = dyn_cast(arg->getType()); - if (!ArgType) - return nullptr; - Type *Res = nullptr; - StructType *ST = - dyn_cast_or_null(ArgType->getNonOpaquePointerElementType()); - - Res = esimd::getVectorTyOrNull(ST); - if (!Res) - return nullptr; - - return PointerType::get(Res, ArgType->getPointerAddressSpace()); -} - -// F may have multiple arguments of type simd*. This -// function updates all parameters along with call -// call sites of F. -Function *ESIMDLowerVecArgPass::rewriteFunc(Function &F) { - FunctionType *FTy = F.getFunctionType(); - Type *RetTy = FTy->getReturnType(); - SmallVector ArgTys; - - for (unsigned int i = 0; i != F.arg_size(); i++) { - auto Arg = F.getArg(i); - Type *NewTy = getSimdArgPtrTyOrNull(Arg); - if (NewTy) { - // Copy over byval type for simd* type - ArgTys.push_back(NewTy); - } else { - // Transfer all non-simd ptr arguments - ArgTys.push_back(Arg->getType()); - } - } - - FunctionType *NFTy = FunctionType::get(RetTy, ArgTys, false); - - // Create new function body and insert into the module - Function *NF = Function::Create(NFTy, F.getLinkage(), F.getName()); - F.getParent()->getFunctionList().insert(F.getIterator(), NF); - - SmallVector Returns; - SmallVector BitCasts; - ValueToValueMapTy VMap; - for (unsigned int I = 0; I != F.arg_size(); I++) { - auto Arg = F.getArg(I); - Type *newTy = getSimdArgPtrTyOrNull(Arg); - if (newTy) { - // bitcast vector* -> simd* - auto BitCast = new BitCastInst(NF->getArg(I), Arg->getType()); - BitCasts.push_back(BitCast); - VMap.insert(std::make_pair(Arg, BitCast)); - continue; - } - VMap.insert(std::make_pair(Arg, NF->getArg(I))); - } - - llvm::CloneFunctionInto(NF, &F, VMap, - CloneFunctionChangeType::LocalChangesOnly, Returns); - - // insert bitcasts in new function only if its a definition - for (auto &B : BitCasts) { - if (!F.isDeclaration()) - B->insertBefore(NF->begin()->getFirstNonPHI()); - else - delete B; - } - - NF->takeName(&F); - - // Fix call sites - SmallVector, 10> OldNewInst; - for (auto &use : F.uses()) { - // Use must be a call site - SmallVector Params; - auto Call = cast(use.getUser()); - // Variadic functions not supported - assert(!Call->getFunction()->isVarArg() && - "Variadic functions not supported"); - for (unsigned int I = 0; I < Call->arg_size(); I++) { - auto SrcOpnd = Call->getOperand(I); - auto NewTy = getSimdArgPtrTyOrNull(SrcOpnd); - if (NewTy) { - auto BitCast = new BitCastInst(SrcOpnd, NewTy, "", Call); - Params.push_back(BitCast); - } else { - if (SrcOpnd != &F) - Params.push_back(SrcOpnd); - else - Params.push_back(NF); - } - } - // create new call instruction - auto NewCallInst = CallInst::Create(NFTy, NF, Params, ""); - NewCallInst->setCallingConv(F.getCallingConv()); - OldNewInst.push_back(std::make_pair(Call, NewCallInst)); - } - - for (auto &InstPair : OldNewInst) { - auto OldInst = InstPair.first; - auto NewInst = InstPair.second; - ReplaceInstWithInst(OldInst, NewInst); - } - - // Make sure to update any metadata as well - if(F.isUsedByMetadata()) { - // The old function is about to be destroyed, so - // just change its type so all replacement works. - F.mutateType(NF->getType()); - ValueAsMetadata::handleRAUW(&F, NF); - } - F.eraseFromParent(); - - return NF; -} - -PreservedAnalyses ESIMDLowerVecArgPass::run(Module &M, - ModuleAnalysisManager &) { - SmallVector functions; - for (auto &F : M) { - // Skip functions that are used through function pointers. - if (!F.hasAddressTaken()) - functions.push_back(&F); - } - - for (auto F : functions) { - for (unsigned int I = 0; I != F->arg_size(); I++) { - auto Arg = F->getArg(I); - if (getSimdArgPtrTyOrNull(Arg)) { - rewriteFunc(*F); - break; - } - } - } - return PreservedAnalyses::none(); -} diff --git a/llvm/test/SYCLLowerIR/ESIMD/global.ll b/llvm/test/SYCLLowerIR/ESIMD/global.ll deleted file mode 100644 index 274903e9325a1..0000000000000 --- a/llvm/test/SYCLLowerIR/ESIMD/global.ll +++ /dev/null @@ -1,166 +0,0 @@ -; This test checks whether globals are converted -; correctly to llvm's native vector type. -; -; RUN: opt < %s -passes=LowerESIMD,ESIMDLowerVecArg -S | FileCheck %s - -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" = type { <16 x i32> } - -$"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" = comdat any - -; CHECK: [[NEWGLOBAL:[@a-zA-Z0-9_]*]] = dso_local global <16 x i32> zeroinitializer, align 64 #0 -@0 = dso_local global %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" zeroinitializer, align 64 #0 - -; Function Attrs: norecurse -define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test"(i32 addrspace(1)* %_arg_) local_unnamed_addr #1 comdat !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 !sycl_explicit_simd !12 !intel_reqd_sub_group_size !8 { -entry: - %vc.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 - %agg.tmp.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 - %call.esimd.i.i.i.i.i = call <3 x i32> @llvm.genx.local.id.v3i32() #5 - %local_id.y.i.i.i.i.i = extractelement <3 x i32> %call.esimd.i.i.i.i.i, i32 1 - %local_id.y.cast.ty.i.i.i.i.i = zext i32 %local_id.y.i.i.i.i.i to i64 - %call.esimd1.i.i.i.i.i = call <3 x i32> @llvm.genx.local.size.v3i32() #5 - %wgsize.y.i.i.i.i.i = extractelement <3 x i32> %call.esimd1.i.i.i.i.i, i32 1 - %wgsize.y.cast.ty.i.i.i.i.i = zext i32 %wgsize.y.i.i.i.i.i to i64 - %group.id.y.i.i.i.i.i = call i32 @llvm.genx.group.id.y() #5 - %group.id.y.cast.ty.i.i.i.i.i = zext i32 %group.id.y.i.i.i.i.i to i64 - %mul.i.i.i.i.i = mul nuw i64 %wgsize.y.cast.ty.i.i.i.i.i, %group.id.y.cast.ty.i.i.i.i.i - %add.i.i.i.i.i = add i64 %mul.i.i.i.i.i, %local_id.y.cast.ty.i.i.i.i.i - %local_id.x.i.i.i.i.i = extractelement <3 x i32> %call.esimd.i.i.i.i.i, i32 0 - %local_id.x.cast.ty.i.i.i.i.i = zext i32 %local_id.x.i.i.i.i.i to i64 - %wgsize.x.i.i.i.i.i = extractelement <3 x i32> %call.esimd1.i.i.i.i.i, i32 0 - %wgsize.x.cast.ty.i.i.i.i.i = zext i32 %wgsize.x.i.i.i.i.i to i64 - %group.id.x.i.i.i.i.i = call i32 @llvm.genx.group.id.x() #5 - %group.id.x.cast.ty.i.i.i.i.i = zext i32 %group.id.x.i.i.i.i.i to i64 - %mul.i4.i.i.i.i = mul nuw i64 %group.id.x.cast.ty.i.i.i.i.i, %wgsize.x.cast.ty.i.i.i.i.i - %add.i5.i.i.i.i = add i64 %mul.i4.i.i.i.i, %local_id.x.cast.ty.i.i.i.i.i - %0 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp.i to i8* - call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) - %1 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %vc.i to i8* - call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %1) #5 - %conv.i = trunc i64 %add.i5.i.i.i.i to i32 - %2 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %vc.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* - %splat.splatinsert.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0 - %splat.splat.i.i = shufflevector <16 x i32> %splat.splatinsert.i.i, <16 x i32> undef, <16 x i32> zeroinitializer - %M_data.i13.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %2, i64 0, i32 0 - store <16 x i32> %splat.splat.i.i, <16 x i32> addrspace(4)* %M_data.i13.i, align 64, !tbaa !13 - %conv3.i = trunc i64 %add.i.i.i.i.i to i32 - %splat.splatinsert.i20.i = insertelement <8 x i32> undef, i32 %conv3.i, i32 0 - %splat.splat.i21.i = shufflevector <8 x i32> %splat.splatinsert.i20.i, <8 x i32> undef, <8 x i32> zeroinitializer - %call.esimd.i.i.i.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i13.i) #5 - %call4.esimd.i.i.i.i = call <16 x i32> @llvm.genx.wrregioni.v16i32.v8i32.i16.v8i1(<16 x i32> %call.esimd.i.i.i.i.i2, <8 x i32> %splat.splat.i21.i, i32 0, i32 8, i32 1, i16 0, i32 0, <8 x i1> ) #5 - call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %call4.esimd.i.i.i.i, <16 x i32> addrspace(4)* %M_data.i13.i) #5 - %cmp.i = icmp eq i64 %add.i.i.i.i.i, 0 - %..i = select i1 %cmp.i, i64 %add.i5.i.i.i.i, i64 %add.i.i.i.i.i - %conv9.i = trunc i64 %..i to i32 -; CHECK: store <16 x i32> , <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds ({{.+}}, {{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*), i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16 - store <16 x i32> , <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0, i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16 - %mul.i = shl nsw i32 %conv9.i, 4 - %idx.ext.i = sext i32 %mul.i to i64 - %add.ptr.i16 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %idx.ext.i - %add.ptr.i = addrspacecast i32 addrspace(1)* %add.ptr.i16 to i32 addrspace(4)* - %3 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* - %call.esimd.i.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i13.i) #5 - %M_data.i2.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %3, i64 0, i32 0 - call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %call.esimd.i.i.i, <16 x i32> addrspace(4)* %M_data.i2.i.i) #5 - call spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(i32 addrspace(4)* %add.ptr.i, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* nonnull %agg.tmp.i) #5 - store <16 x i32> , <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0, i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16 - call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %1) #5 - call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) - ret void -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #2 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #2 - -; Function Attrs: noinline norecurse nounwind -define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE(i32 addrspace(4)* %C, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v) local_unnamed_addr #3 { -entry: - %agg.tmp = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 - %0 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %v to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* - %1 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* %agg.tmp to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* - %M_data.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %0, i64 0, i32 0 - %call.esimd.i.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i.i), !noalias !17 -; CHECK: {{.+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr ({{.+}}, {{.+}} addrspace(4)* addrspacecast ({{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*) to {{.+}} addrspace(4)*), i64 0, i32 0)), !noalias !17 - %call.esimd.i8.i = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* addrspacecast (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd"* @0 to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*), i64 0, i32 0)), !noalias !17 - %add.i = add <16 x i32> %call.esimd.i8.i, %call.esimd.i.i - %M_data.i.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd", %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* %1, i64 0, i32 0 - call void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %add.i, <16 x i32> addrspace(4)* %M_data.i.i.i) - %2 = ptrtoint i32 addrspace(4)* %C to i64 - %call.esimd.i.i2 = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %M_data.i.i.i) - call void @llvm.genx.svm.block.st.v16i32(i64 %2, <16 x i32> %call.esimd.i.i2) - ret void -} - -; Function Attrs: nounwind readnone -declare !genx_intrinsic_id !20 <16 x i32> @llvm.genx.wrregioni.v16i32.v8i32.i16.v8i1(<16 x i32> %0, <8 x i32> %1, i32 %2, i32 %3, i32 %4, i16 %5, i32 %6, <8 x i1> %7) #4 - -; Function Attrs: nounwind -declare !genx_intrinsic_id !21 <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* %0) #5 - -; Function Attrs: nounwind -declare !genx_intrinsic_id !22 void @llvm.genx.vstore.v16i32.p4v16i32(<16 x i32> %0, <16 x i32> addrspace(4)* %1) #5 - -; Function Attrs: nounwind -declare !genx_intrinsic_id !23 void @llvm.genx.svm.block.st.v16i32(i64 %0, <16 x i32> %1) #5 - -; Function Attrs: nounwind readnone -declare !genx_intrinsic_id !24 <3 x i32> @llvm.genx.local.id.v3i32() #4 - -; Function Attrs: nounwind readnone -declare !genx_intrinsic_id !25 <3 x i32> @llvm.genx.local.size.v3i32() #4 - -; Function Attrs: nounwind readnone -declare !genx_intrinsic_id !26 i32 @llvm.genx.group.id.y() #4 - -; Function Attrs: nounwind readnone -declare !genx_intrinsic_id !27 i32 @llvm.genx.group.id.x() #4 - -attributes #0 = { "genx_byte_offset"="192" "genx_volatile" } -attributes #1 = { norecurse "CMGenxMain" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="512" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "oclrt"="1" "stack-protector-buffer-size"="8" "sycl-module-id"="subroutine.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { argmemonly nounwind willreturn } -attributes #3 = { noinline norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="512" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { nounwind readnone } -attributes #5 = { nounwind } - -!llvm.dependent-libraries = !{!0} -!llvm.module.flags = !{!1} -!opencl.spir.version = !{!2} -!spirv.Source = !{!3} -!llvm.ident = !{!4} -!genx.kernels = !{!5} - -!0 = !{!"libcpmt"} -!1 = !{i32 1, !"wchar_size", i32 2} -!2 = !{i32 1, i32 2} -!3 = !{i32 6, i32 100000} -!4 = !{!"clang version 11.0.0"} -!5 = !{void (i32 addrspace(1)*)* @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test", !"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test", !6, i32 0, i32 0, !6, !7, i32 0, i32 0} -!6 = !{i32 0} -!7 = !{!"svmptr_t"} -!8 = !{i32 1} -!9 = !{!"none"} -!10 = !{!"int*"} -!11 = !{!""} -!12 = !{} -!13 = !{!14, !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C++ TBAA"} -!16 = !{i64 0, i64 64, !13} -!17 = !{!18} -!18 = distinct !{!18, !19, !"_ZNK2cl4sycl5INTEL3gpu4simdIiLi16EEplERKS4_: %agg.result"} -!19 = distinct !{!19, !"_ZNK2cl4sycl5INTEL3gpu4simdIiLi16EEplERKS4_"} -!20 = !{i32 8275} -!21 = !{i32 8268} -!22 = !{i32 8269} -!23 = !{i32 8166} -!24 = !{i32 8029} -!25 = !{i32 8034} -!26 = !{i32 8020} -!27 = !{i32 8019} - diff --git a/llvm/test/SYCLLowerIR/ESIMD/global_crash.ll b/llvm/test/SYCLLowerIR/ESIMD/global_crash.ll deleted file mode 100644 index cabf30983661e..0000000000000 --- a/llvm/test/SYCLLowerIR/ESIMD/global_crash.ll +++ /dev/null @@ -1,26 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=LowerESIMD,ESIMDLowerVecArg -S | FileCheck %s - -; This test checks that there is no compiler crash when a Global -; is used in simple instruction, not directly in ConstantExpr. - -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"class.cl::sycl::INTEL::gpu::simd" = type { <2512 x i32> } - -; CHECK: @Global = dso_local global <2512 x i32> undef, align 16384 -@Global = dso_local global %"class.cl::sycl::INTEL::gpu::simd" undef, align 16384 - -define void @no_crash(<2512 x i32> %simd_val) { -; CHECK-LABEL: @no_crash( -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast %"class.cl::sycl::INTEL::gpu::simd"* bitcast (<2512 x i32>* @Global to %"class.cl::sycl::INTEL::gpu::simd"*) to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* -; CHECK-NEXT: [[GEP:%.*]] = getelementptr %"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* [[CAST]], i64 0, i32 0 -; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], <2512 x i32> addrspace(4)* [[GEP]], align 16384 -; CHECK-NEXT: ret void -; - %cast = addrspacecast %"class.cl::sycl::INTEL::gpu::simd"* @Global to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* - %gep = getelementptr %"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* %cast, i64 0, i32 0 - store <2512 x i32> %simd_val, <2512 x i32> addrspace(4)* %gep, align 16384 - ret void -} diff --git a/llvm/test/SYCLLowerIR/ESIMD/global_undef.ll b/llvm/test/SYCLLowerIR/ESIMD/global_undef.ll deleted file mode 100644 index 4924dd715d8f1..0000000000000 --- a/llvm/test/SYCLLowerIR/ESIMD/global_undef.ll +++ /dev/null @@ -1,22 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=LowerESIMD,ESIMDLowerVecArg -S | FileCheck %s - -; This test checks that undef initializer of a global variable is preserved -; during ESIMDLowerVecArg transformation - -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"class.cl::sycl::INTEL::gpu::simd" = type { <2512 x i32> } - -; CHECK: @Global = dso_local global <2512 x i32> undef, align 16384 -@Global = dso_local global %"class.cl::sycl::INTEL::gpu::simd" undef, align 16384 - -define void @f(<2512 x i32> %simd_val) { -; CHECK-LABEL: @f( -; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], <2512 x i32> addrspace(4)* getelementptr (%"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* addrspacecast (%"class.cl::sycl::INTEL::gpu::simd"* bitcast (<2512 x i32>* @Global to %"class.cl::sycl::INTEL::gpu::simd"*) to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*), i64 0, i32 0), align 16384 -; CHECK-NEXT: ret void -; - store <2512 x i32> %simd_val, <2512 x i32> addrspace(4)* getelementptr (%"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* addrspacecast (%"class.cl::sycl::INTEL::gpu::simd"* @Global to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*), i64 0, i32 0), align 16384 - ret void -} diff --git a/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp.ll b/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp.ll deleted file mode 100644 index fcb6fe869d5fa..0000000000000 --- a/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp.ll +++ /dev/null @@ -1,59 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=ESIMDLowerVecArg -S | FileCheck %s - -; This test checks that there is no crash in ESIMDLowerVecArg pass when -; rewriting funcitons that are used through a function pointer. - -%"cl::sycl::INTEL::gpu::simd" = type { <64 x i32> } - -define dso_local spir_func void @func(%"cl::sycl::INTEL::gpu::simd"* %arg) { -; CHECK-LABEL: @func( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret void -; -entry: - ret void -} - -define dso_local spir_func void @init_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)** %foo) !sycl_explicit_simd !1 { -; CHECK-LABEL: @init_ptr( -; CHECK-NEXT: entry: -; CHECK-NEXT: store void (%"cl::sycl::INTEL::gpu::simd"*)* @func, void (%"cl::sycl::INTEL::gpu::simd"*)** [[FOO:%.*]], align 8 -; CHECK-NEXT: ret void -; -entry: - store void (%"cl::sycl::INTEL::gpu::simd"*)* @func, void (%"cl::sycl::INTEL::gpu::simd"*)** %foo - ret void -} - -define dso_local spir_func void @use_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)* %foo) !sycl_explicit_simd !1 { -; CHECK-LABEL: @use_ptr( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca %"cl::sycl::INTEL::gpu::simd", align 256 -; CHECK-NEXT: call spir_func void [[FOO:%.*]](%"cl::sycl::INTEL::gpu::simd"* [[AGG_TMP]]) -; CHECK-NEXT: ret void -; -entry: - %agg.tmp = alloca %"cl::sycl::INTEL::gpu::simd" - call spir_func void %foo(%"cl::sycl::INTEL::gpu::simd"* %agg.tmp) - ret void -} - -define dso_local spir_func void @esimd_kernel() !sycl_explicit_simd !1 { -; CHECK-LABEL: @esimd_kernel( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[FP:%.*]] = alloca void (%"cl::sycl::INTEL::gpu::simd"*)*, align 8 -; CHECK-NEXT: call spir_func void @init_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)** [[FP]]) -; CHECK-NEXT: [[TMP0:%.*]] = load void (%"cl::sycl::INTEL::gpu::simd"*)*, void (%"cl::sycl::INTEL::gpu::simd"*)** [[FP]], align 8 -; CHECK-NEXT: call spir_func void @use_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)* [[TMP0]]) -; CHECK-NEXT: ret void -; -entry: - %fp = alloca void (%"cl::sycl::INTEL::gpu::simd"*)* - call spir_func void @init_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)** %fp) - %0 = load void (%"cl::sycl::INTEL::gpu::simd"*)*, void (%"cl::sycl::INTEL::gpu::simd"*)** %fp - call spir_func void @use_ptr(void (%"cl::sycl::INTEL::gpu::simd"*)* %0) - ret void -} - -!1 = !{} diff --git a/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp_metadata.ll b/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp_metadata.ll deleted file mode 100644 index d67f05736c196..0000000000000 --- a/llvm/test/SYCLLowerIR/ESIMD/lower_vec_arg_fp_metadata.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: opt < %s -passes=ESIMDLowerVecArg -S | FileCheck %s - -; Check that we correctly update metadata to reference the new function - -%"class.sycl::_V1::vec" = type { <2 x double> } - -$foo = comdat any - -define weak_odr dso_local spir_kernel void @foo(%"class.sycl::_V1::vec" addrspace(1)* noundef align 16 %_arg_out) local_unnamed_addr comdat { -entry: - ret void -} - -;CHECK: !genx.kernels = !{![[GenXMD:[0-9]+]]} -!genx.kernels = !{!0} - -;CHECK: ![[GenXMD]] = !{void (<2 x double> addrspace(1)*)* @foo, {{.*}}} -!0 = !{void (%"class.sycl::_V1::vec" addrspace(1)*)* @foo, !"foo", !1, i32 0, i32 0, !1, !2, i32 0, i32 0} -!1 = !{i32 0} -!2 = !{!"svmptr_t"} diff --git a/llvm/test/SYCLLowerIR/ESIMD/subroutine.ll b/llvm/test/SYCLLowerIR/ESIMD/subroutine.ll deleted file mode 100644 index 100ea3a3b2f8b..0000000000000 --- a/llvm/test/SYCLLowerIR/ESIMD/subroutine.ll +++ /dev/null @@ -1,107 +0,0 @@ -; This test checks whether subroutine arguments are converted -; correctly to llvm's native vector type. -; -; RUN: opt < %s -passes=ESIMDLowerVecArg -S | FileCheck %s - -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%class._ZTS4simdIiLi16EE.simd = type { <16 x i32> } - -$_ZN4simdIiLi16EEC1ERS0_ = comdat any - -$_ZN4simdIiLi16EEC2ERS0_ = comdat any - -; Function Attrs: norecurse nounwind -define spir_func void @_Z3fooi(i32 %x) #0 { -entry: - %x.addr = alloca i32, align 4 -; CHECK: {{.+}} = alloca {{.+}} -; CHECK-NEXT: [[A:%[a-zA-Z0-9_]*]] = alloca {{.+}} - %a = alloca %class._ZTS4simdIiLi16EE.simd, align 64 - %agg.tmp = alloca %class._ZTS4simdIiLi16EE.simd, align 64 - store i32 %x, i32* %x.addr, align 4, !tbaa !4 - %0 = bitcast %class._ZTS4simdIiLi16EE.simd* %a to i8* - call void @llvm.lifetime.start.p0i8(i64 64, i8* %0) #2 -; CHECK: [[ADDRSPCAST1:%[a-zA-Z0-9_]*]] = addrspacecast {{.+}} [[A]] to {{.+}} - %1 = addrspacecast %class._ZTS4simdIiLi16EE.simd* %agg.tmp to %class._ZTS4simdIiLi16EE.simd addrspace(4)* - %2 = addrspacecast %class._ZTS4simdIiLi16EE.simd* %a to %class._ZTS4simdIiLi16EE.simd addrspace(4)* -; CHECK: [[BITCASTRESULT1:%[a-zA-Z0-9_]*]] = bitcast {{.+}} addrspace(4)* [[ADDRSPCAST1]] to <16 x i32> addrspace(4)* -; CHECK-NEXT: call spir_func void @_ZN4simdIiLi16EEC1ERS0_(<16 x i32> addrspace(4)* {{.+}}, <16 x i32> addrspace(4)* [[BITCASTRESULT1]]) - call spir_func void @_ZN4simdIiLi16EEC1ERS0_(%class._ZTS4simdIiLi16EE.simd addrspace(4)* %1, %class._ZTS4simdIiLi16EE.simd addrspace(4)* align 64 dereferenceable(64) %2) -; CHECK: [[BITCASTRESULT2:%[a-zA-Z0-9_]*]] = bitcast {{.+}} to <16 x i32>* -; CHECK-NEXT: {{.+}} = call spir_func i32 {{.+}}bar{{.+}}(<16 x i32>* [[BITCASTRESULT2]]) - %call = call spir_func i32 @_Z3bar4simdIiLi16EE(%class._ZTS4simdIiLi16EE.simd* %agg.tmp) - %3 = bitcast %class._ZTS4simdIiLi16EE.simd* %a to i8* - call void @llvm.lifetime.end.p0i8(i64 64, i8* %3) #2 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 - -; Function Attrs: norecurse nounwind -; CHECK: define spir_func i32 @_Z3bar4simdIiLi16EE(<16 x i32>* {{.+}} -define spir_func i32 @_Z3bar4simdIiLi16EE(%class._ZTS4simdIiLi16EE.simd* %v) #0 { -entry: -; CHECK: {{.+}} = bitcast <16 x i32>* {{.+}} - ret i32 1 -} - -; Function Attrs: norecurse nounwind -; CHECK: define linkonce_odr spir_func void @_ZN4simdIiLi16EEC1ERS0_(<16 x i32> addrspace(4)* [[OLDARG0:%[a-zA-Z0-9_]*]], <16 x i32> addrspace(4)*{{.*}} [[OLDARG1:%[a-zA-Z0-9_]*]]) unnamed_addr {{.+}} -define linkonce_odr spir_func void @_ZN4simdIiLi16EEC1ERS0_(%class._ZTS4simdIiLi16EE.simd addrspace(4)* %this, %class._ZTS4simdIiLi16EE.simd addrspace(4)* align 64 dereferenceable(64) %other) unnamed_addr #0 comdat align 2 { -entry: -; CHECK: [[NEWARG1:%[a-zA-Z0-9_]*]] = bitcast <16 x i32> addrspace(4)* [[OLDARG1]] to {{.+}} -; CHECK-NEXT: [[NEWARG0:%[a-zA-Z0-9_]*]] = bitcast <16 x i32> addrspace(4)* [[OLDARG0]] to {{.+}} - %this.addr = alloca %class._ZTS4simdIiLi16EE.simd addrspace(4)*, align 8 - %other.addr = alloca %class._ZTS4simdIiLi16EE.simd addrspace(4)*, align 8 -; CHECK: store {{.+}} addrspace(4)* [[NEWARG0]], {{.+}} - store %class._ZTS4simdIiLi16EE.simd addrspace(4)* %this, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %this.addr, align 8, !tbaa !8 -; CHECK-NEXT: store {{.+}} addrspace(4)* [[NEWARG1]], {{.+}} - store %class._ZTS4simdIiLi16EE.simd addrspace(4)* %other, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %other.addr, align 8, !tbaa !8 - %this1 = load %class._ZTS4simdIiLi16EE.simd addrspace(4)*, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %this.addr, align 8 - %0 = load %class._ZTS4simdIiLi16EE.simd addrspace(4)*, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %other.addr, align 8 - call spir_func void @_ZN4simdIiLi16EEC2ERS0_(%class._ZTS4simdIiLi16EE.simd addrspace(4)* %this1, %class._ZTS4simdIiLi16EE.simd addrspace(4)* align 64 dereferenceable(64) %0) - ret void -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 - -; Function Attrs: norecurse nounwind -define linkonce_odr spir_func void @_ZN4simdIiLi16EEC2ERS0_(%class._ZTS4simdIiLi16EE.simd addrspace(4)* %this, %class._ZTS4simdIiLi16EE.simd addrspace(4)* align 64 dereferenceable(64) %other) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %class._ZTS4simdIiLi16EE.simd addrspace(4)*, align 8 - %other.addr = alloca %class._ZTS4simdIiLi16EE.simd addrspace(4)*, align 8 - store %class._ZTS4simdIiLi16EE.simd addrspace(4)* %this, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %this.addr, align 8, !tbaa !8 - store %class._ZTS4simdIiLi16EE.simd addrspace(4)* %other, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %other.addr, align 8, !tbaa !8 - %this1 = load %class._ZTS4simdIiLi16EE.simd addrspace(4)*, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %this.addr, align 8 - %0 = load %class._ZTS4simdIiLi16EE.simd addrspace(4)*, %class._ZTS4simdIiLi16EE.simd addrspace(4)** %other.addr, align 8, !tbaa !8 - %__M_data = getelementptr inbounds %class._ZTS4simdIiLi16EE.simd, %class._ZTS4simdIiLi16EE.simd addrspace(4)* %0, i32 0, i32 0 - %1 = load <16 x i32>, <16 x i32> addrspace(4)* %__M_data, align 64, !tbaa !10 - %__M_data2 = getelementptr inbounds %class._ZTS4simdIiLi16EE.simd, %class._ZTS4simdIiLi16EE.simd addrspace(4)* %this1, i32 0, i32 0 - store <16 x i32> %1, <16 x i32> addrspace(4)* %__M_data2, align 64, !tbaa !10 - ret void -} - -attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0} -!opencl.spir.version = !{!1} -!spirv.Source = !{!2} -!llvm.ident = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 1, i32 2} -!2 = !{i32 4, i32 100000} -!3 = !{!"clang version 11.0.0 (https://github.com/kbobrovs/llvm.git fb752d6351dc6785f5438b137a86fa39a3493225)"} -!4 = !{!5, !5, i64 0} -!5 = !{!"int", !6, i64 0} -!6 = !{!"omnipotent char", !7, i64 0} -!7 = !{!"Simple C++ TBAA"} -!8 = !{!9, !9, i64 0} -!9 = !{!"any pointer", !6, i64 0} -!10 = !{!6, !6, i64 0} diff --git a/llvm/test/SYCLLowerIR/ESIMD/subroutine_extern.ll b/llvm/test/SYCLLowerIR/ESIMD/subroutine_extern.ll deleted file mode 100644 index 376be5384782a..0000000000000 --- a/llvm/test/SYCLLowerIR/ESIMD/subroutine_extern.ll +++ /dev/null @@ -1,39 +0,0 @@ -; This test checks whether subroutine arguments are converted -; correctly to llvm's native vector type when callee is an extern function. -; -; RUN: opt < %s -passes=ESIMDLowerVecArg -S | FileCheck %s - -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" = type { <16 x float> } - -$"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" = comdat any - -@_ZL2VL = internal unnamed_addr addrspace(1) constant i32 16, align 4 - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test"() { -entry: - %0 = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 - %agg.tmp5.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 - %agg.tmp6.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd", align 64 - %1 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* %0 to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* - -; CHECK: [[BITCASTRESULT1:%[a-zA-Z0-9_]*]] = bitcast {{.+}} addrspace(4)* %1 to <16 x float> addrspace(4)* -; CHECK: [[BITCASTRESULT2:%[a-zA-Z0-9_]*]] = bitcast {{.+}} %agg.tmp5.i to <16 x float>* -; CHECK: [[BITCASTRESULT3:%[a-zA-Z0-9_]*]] = bitcast {{.+}} %agg.tmp6.i to <16 x float>* -; CHECK-NEXT: call spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(<16 x float> addrspace(4)* [[BITCASTRESULT1]], <16 x float>* [[BITCASTRESULT2]], <16 x float>* [[BITCASTRESULT3]]) - - call spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* sret(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %1, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* nonnull byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %agg.tmp5.i, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* nonnull byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %agg.tmp6.i) #1 - ret void -} - -; CHECK: declare dso_local spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(<16 x float> addrspace(4)*, <16 x float>*, <16 x float>*){{.+}} -; Function Attrs: convergent -declare dso_local spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* sret(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %0, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %1, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %2) local_unnamed_addr #2 - -attributes #0 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { convergent } - - diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 2c3f4029d83d4..69a42459b0207 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -459,7 +459,6 @@ int main(int argc, char **argv) { initializeSYCLLowerInvokeSimdLegacyPassPass(Registry); initializeSPIRITTAnnotationsLegacyPassPass(Registry); initializeESIMDLowerLoadStorePass(Registry); - initializeESIMDLowerVecArgLegacyPassPass(Registry); initializeESIMDVerifierPass(Registry); initializeSYCLLowerWGLocalMemoryLegacyPass(Registry); initializeSYCLMutatePrintfAddrspaceLegacyPassPass(Registry); diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index ac82c0ef91d16..3d3bf3d71d4c2 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -605,11 +605,7 @@ bool lowerEsimdConstructs(module_split::ModuleDesc &MD) { FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } - if (!MD.getModule().getContext().supportsTypedPointers()) { - MPM.addPass(ESIMDOptimizeVecArgCallConvPass{}); - } else { - MPM.addPass(ESIMDLowerVecArgPass{}); - } + MPM.addPass(ESIMDOptimizeVecArgCallConvPass{}); FunctionPassManager MainFPM; MainFPM.addPass(ESIMDLowerLoadStorePass{}); diff --git a/sycl/test/esimd/ctor_codegen.cpp b/sycl/test/esimd/ctor_codegen.cpp index baa1920ac8818..77d5f948fc031 100644 --- a/sycl/test/esimd/ctor_codegen.cpp +++ b/sycl/test/esimd/ctor_codegen.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -Xclang -no-opaque-pointers -fsycl -fsycl-device-only -S %s -o - | FileCheck %s +// RUN: %clangxx -fsycl -fsycl-device-only -S %s -o - | FileCheck %s // Check efficiency of LLVM IR generated for various simd constructors. @@ -19,8 +19,7 @@ SYCL_EXTERNAL auto foo(double i) SYCL_ESIMD_FUNCTION { return val; // CHECK: %[[V0:[a-zA-Z0-9_\.]+]] = insertelement <2 x double> undef, double %[[I]], i64 0 // CHECK-NEXT: %[[V1:[a-zA-Z0-9_\.]+]] = shufflevector <2 x double> %[[V0]], <2 x double> poison, <2 x i32> zeroinitializer -// CHECK-NEXT: %[[MDATA:[a-zA-Z0-9_\.]+]] = getelementptr inbounds {{.*}} %[[RES]], i64 0, i32 0, i32 0 -// CHECK-NEXT: store <2 x double> %[[V1]], <2 x double> addrspace(4)* %[[MDATA]] +// CHECK-NEXT: store <2 x double> %[[V1]], ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } @@ -36,8 +35,7 @@ SYCL_EXTERNAL auto baz() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z3bazv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { simd val(17, 3); return val; - // CHECK: %[[MDATA:[a-zA-Z0-9_\.]+]] = getelementptr inbounds {{.*}} %[[RES]], i64 0, i32 0, i32 0 - // CHECK-NEXT: store <2 x i32> , <2 x i32> addrspace(4)* %[[MDATA]] + // CHECK: store <2 x i32> , ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } @@ -47,8 +45,7 @@ SYCL_EXTERNAL auto gee() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z3geev({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { simd val(-7); return val; -// CHECK: %[[MDATA:[a-zA-Z0-9_\.]+]] = getelementptr inbounds {{.*}} %[[RES]], i64 0, i32 0, i32 0 -// CHECK-NEXT: store <2 x float> , <2 x float> addrspace(4)* %[[MDATA]] +// CHECK: store <2 x float> , ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } @@ -58,8 +55,7 @@ SYCL_EXTERNAL auto foomask() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z7foomaskv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { simd_mask<2> val({ 1, 0 }); return val; -// CHECK: %[[MDATA:[a-zA-Z0-9_\.]+]] = getelementptr inbounds {{.*}} %[[RES]], i64 0, i32 0, i32 0 -// CHECK-NEXT: store <2 x i16> , <2 x i16> addrspace(4)* %[[MDATA]] +// CHECK: store <2 x i16> , ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } @@ -69,8 +65,7 @@ SYCL_EXTERNAL auto geemask() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z7geemaskv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { simd_mask<2> val(1); return val; -// CHECK: %[[MDATA:[a-zA-Z0-9_\.]+]] = getelementptr inbounds {{.*}} %[[RES]], i64 0, i32 0, i32 0 -// CHECK-NEXT: store <2 x i16> , <2 x i16> addrspace(4)* %[[MDATA]] +// CHECK: store <2 x i16> , ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } @@ -95,8 +90,7 @@ SYCL_EXTERNAL auto geehalf() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z7geehalfv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { simd val(-7); return val; -// CHECK: %[[MDATA:[a-zA-Z0-9_\.]+]] = getelementptr inbounds {{.*}} %[[RES]], i64 0, i32 0, i32 0 -// CHECK-NEXT: store <2 x half> , <2 x half> addrspace(4)* %[[MDATA]] +// CHECK: store <2 x half> , ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } diff --git a/sycl/test/esimd/ctor_codegen_opaque.cpp b/sycl/test/esimd/ctor_codegen_opaque.cpp deleted file mode 100644 index d593e3fad2117..0000000000000 --- a/sycl/test/esimd/ctor_codegen_opaque.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// RUN: %clangxx -Xclang -opaque-pointers -fsycl -fsycl-device-only -S %s -o - | FileCheck %s - -// Check efficiency of LLVM IR generated for various simd constructors. - -#include -#include - -using namespace sycl; -using namespace sycl::ext::intel::esimd; - -// clang-format off - -// Array-based constructor, FP element type, no loops exected - check. -SYCL_EXTERNAL auto foo(double i) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func void @_Z3food( -// CHECK: {{[^,]*}} %[[RES:[a-zA-Z0-9_\.]+]], -// CHECK: {{[^,]*}} %[[I:[a-zA-Z0-9_\.]+]]){{.*}} { - simd val({ i, i }); - return val; -// CHECK: %[[V0:[a-zA-Z0-9_\.]+]] = insertelement <2 x double> undef, double %[[I]], i64 0 -// CHECK-NEXT: %[[V1:[a-zA-Z0-9_\.]+]] = shufflevector <2 x double> %[[V0]], <2 x double> poison, <2 x i32> zeroinitializer -// CHECK-NEXT: store <2 x double> %[[V1]], ptr addrspace(4) %[[RES]] -// CHECK-NEXT: ret void -// CHECK-NEXT: } -} - -// Base + step constructor, FP element type, loops exected - don't check. -SYCL_EXTERNAL auto bar() SYCL_ESIMD_FUNCTION { - simd val(17, 3); - return val; -} - -// Base + step constructor, integer element type, no loops exected - check. -SYCL_EXTERNAL auto baz() SYCL_ESIMD_FUNCTION { - // CHECK: define dso_local spir_func void @_Z3bazv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { - simd val(17, 3); - return val; - // CHECK: store <2 x i32> , ptr addrspace(4) %[[RES]] - // CHECK-NEXT: ret void - // CHECK-NEXT: } -} - -// Broadcast constructor, FP element type, no loops exected - check. -SYCL_EXTERNAL auto gee() SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func void @_Z3geev({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { - simd val(-7); - return val; -// CHECK: store <2 x float> , ptr addrspace(4) %[[RES]] -// CHECK-NEXT: ret void -// CHECK-NEXT: } -} - -// Array-based simd_mask constructor, no loops exected - check. -SYCL_EXTERNAL auto foomask() SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func void @_Z7foomaskv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { - simd_mask<2> val({ 1, 0 }); - return val; -// CHECK: store <2 x i16> , ptr addrspace(4) %[[RES]] -// CHECK-NEXT: ret void -// CHECK-NEXT: } -} - -// Broadcast simd_mask constructor, no loops exected - check. -SYCL_EXTERNAL auto geemask() SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func void @_Z7geemaskv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { - simd_mask<2> val(1); - return val; -// CHECK: store <2 x i16> , ptr addrspace(4) %[[RES]] -// CHECK-NEXT: ret void -// CHECK-NEXT: } -} - -// The element type is 'half', which requires conversion, so code generation -// is less efficient - has loop over elements. No much reason to check. -SYCL_EXTERNAL auto foohalf(half i) SYCL_ESIMD_FUNCTION { - simd val({ i, i }); - return val; -} - -// The element type is 'half', which requires conversion, so code generation -// is less efficient - has loop over elements. No much reason to check. -SYCL_EXTERNAL auto barhalf() SYCL_ESIMD_FUNCTION { - simd val(17, 3); - return val; -} - -// Here the element is half too, but code generation is efficient because -// no per-element operations are needed - scalar is converted before broadcasting. -SYCL_EXTERNAL auto geehalf() SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func void @_Z7geehalfv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { - simd val(-7); - return val; -// CHECK: store <2 x half> , ptr addrspace(4) %[[RES]] -// CHECK-NEXT: ret void -// CHECK-NEXT: } -} diff --git a/sycl/test/esimd/intrins_trans.cpp b/sycl/test/esimd/intrins_trans.cpp index 469154773f586..b761aec084645 100644 --- a/sycl/test/esimd/intrins_trans.cpp +++ b/sycl/test/esimd/intrins_trans.cpp @@ -1,8 +1,12 @@ -// RUN: %clangxx -O0 -fsycl -fno-sycl-esimd-force-stateless-mem -fsycl-device-only -Xclang -emit-llvm -Xclang -no-opaque-pointers %s -o %t +// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t // RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table -// RUN: FileCheck %s -input-file=%t_esimd_0.ll +// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL -// Checks ESIMD intrinsic translation. +// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t +// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table +// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS + +// Checks ESIMD intrinsic translation with opaque pointers. // NOTE: must be run in -O0, as optimizer optimizes away some of the code #include @@ -32,149 +36,6 @@ void bar() { kernel(esimdf); } -SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo() { - // CHECK-LABEL: @_Z3foov - constexpr int VL = 32; - uint32_t *ptr = 0; - using VecT = typename simd::raw_vector_type; - VecT *vec_ptr = 0; - - int x = 0, y = 0, z = 0; - - simd v1(0, x + z); - simd offsets(0, y); - simd v_addr(reinterpret_cast(ptr)); - simd_mask pred; - v_addr += offsets; - - __esimd_svm_atomic0(v_addr.data(), pred.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) - - __esimd_svm_atomic1(v_addr.data(), v1.data(), - pred.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) - __esimd_svm_atomic2( - v_addr.data(), v1.data(), v1.data(), pred.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) - - simd v00 = __esimd_svm_block_ld(vec_ptr); - // CHECK: %[[VAR1:[0-9a-zA-Z_.]+]] = load <32 x i32>, <32 x i32> addrspace(4)* %{{[a-zA-Z0-9.]+}}, align 4 - __esimd_svm_block_st(vec_ptr, v00.data()); - // CHECK-NEXT: store <32 x i32> %[[VAR1]], <32 x i32> addrspace(4)* %{{[a-zA-Z0-9.]+}}, align 128 - - simd v01 = - __esimd_svm_gather(v_addr.data(), pred.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.gather.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) - - __esimd_svm_scatter(v_addr.data(), v01.data(), pred.data()); - // CHECK: call void @llvm.genx.svm.scatter.v32i1.v32i64.v32i32(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}) - - simd mina(0, 1); - simd minc(5); - minc = __esimd_smin(mina.data(), minc.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i16> @llvm.genx.smin.v16i16.v16i16(<16 x i16> %{{[0-9a-zA-Z_.]+}}, <16 x i16> %{{[0-9a-zA-Z_.]+}}) - - simd diva(2.f); - simd divb(1.f); - diva = __esimd_ieee_div(diva.data(), divb.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <1 x float> @llvm.genx.ieee.div.v1f32(<1 x float> %{{[0-9a-zA-Z_.]+}}, <1 x float> %{{[0-9a-zA-Z_.]+}}) - - simd a(0.1f); - simd b = __esimd_rdregion(a.data(), 0); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0) - - simd c(0.0f); - - using PH = sycl::access::placeholder; - - sycl::accessor, 2, - sycl::access::mode::read, sycl::access::target::image, - PH::false_t> - pA; - sycl::accessor, 2, - sycl::access::mode::write, sycl::access::target::image, - PH::false_t> - pB; - - auto d = __esimd_wrregion( - c.data() /*dst*/, b.data() /*src*/, 0 /*offset*/); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x float> @llvm.genx.wrregionf.v16f32.v8f32.i16.v8i1(<16 x float> %{{[0-9a-zA-Z_.]+}}, <8 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0, <8 x i1> ) - - simd va; - va = media_block_load(pA, x, y); - // CHECK: %[[SI0_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(%opencl.image2d_ro_t addrspace(1)* %{{[0-9a-zA-Z_.]+}}) - // CHECK: store i32 %[[SI0_VAL]], i32 addrspace(4)* %[[SI0_ADDR:[0-9a-zA-Z_.]+]] - // CHECK: %[[SI0:[0-9a-zA-Z_.]+]] = load i32, i32 addrspace(4)* %[[SI0_ADDR]] - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.media.ld.v32i32(i32 0, i32 %[[SI0]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}) - - simd vb = va + 1; - media_block_store(pB, x, y, vb); - // CHECK: %[[SI2_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(%opencl.image2d_wo_t addrspace(1)* %{{[0-9a-zA-Z_.]+}}) - // CHECK: store i32 %[[SI2_VAL]], i32 addrspace(4)* %[[SI2_ADDR:[0-9a-zA-Z_.]+]] - // CHECK: %[[SI2:[0-9a-zA-Z_.]+]] = load i32, i32 addrspace(4)* %[[SI2_ADDR]] - // CHECK: call void @llvm.genx.media.st.v32i32(i32 0, i32 %[[SI2]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}) - - auto ee = __esimd_vload((detail::vector_type_t *)(&vg)); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i32> @llvm.genx.vload.v16i32.p0v16i32(<16 x i32>* {{.*}}) - __esimd_vstore(&vc, va.data()); - // CHECK: store <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> addrspace(4)* {{.*}} - - { - sycl::accessor - acc; - simd offsets = 1; - simd_mask<8> pred({1, 0, 1, 0, 1, 0, 1, 0}); - - // 4-byte element gather - simd v = gather(acc, offsets, 100); - // CHECK: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(i32 addrspace(1)* noundef %{{[0-9a-zA-Z_.]+}}) - // CHECK: store i32 %[[SI3_VAL]], i32 addrspace(4)* %[[SI3_ADDR:[0-9a-zA-Z_.]+]] - // CHECK: %[[SI3:[0-9a-zA-Z_.]+]] = load i32, i32 addrspace(4)* %[[SI3_ADDR]] - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %[[SI3]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}}) - - // 4-byte element scatter - scatter(acc, offsets, v, 100, pred); - // CHECK: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(i32 addrspace(1)* noundef %{{[0-9a-zA-Z_.]+}}) - // CHECK: store i32 %[[SI4_VAL]], i32 addrspace(4)* %[[SI4_ADDR:[0-9a-zA-Z_.]+]] - // CHECK: %[[SI4:[0-9a-zA-Z_.]+]] = load i32, i32 addrspace(4)* %[[SI4_ADDR]] - // CHECK: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) - - // 1-byte element gather - simd v1 = gather(acc, offsets, 100); - // CHECK: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(i32 addrspace(1)* noundef %{{[0-9a-zA-Z_.]+}}) - // CHECK: store i32 %[[SI5_VAL]], i32 addrspace(4)* %[[SI5_ADDR:[0-9a-zA-Z_.]+]] - // CHECK: %[[SI5:[0-9a-zA-Z_.]+]] = load i32, i32 addrspace(4)* %[[SI5_ADDR]] - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}}) - - // 1-byte element scatter - scatter(acc, offsets, v1, 100, pred); - // CHECK: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(i32 addrspace(1)* noundef %{{[0-9a-zA-Z_.]+}}) - // CHECK: store i32 %[[SI6_VAL]], i32 addrspace(4)* %[[SI6_ADDR:[0-9a-zA-Z_.]+]] - // CHECK: %[[SI6:[0-9a-zA-Z_.]+]] = load i32, i32 addrspace(4)* %[[SI6_ADDR]] - // CHECK: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, i16 0, i32 %[[SI6]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) - } - __esimd_fence(fence_mask::global_coherent_fence); - // CHECK: call void @llvm.genx.fence(i8 1) - __esimd_fence(fence_mask::l3_flush_instructions); - // CHECK: call void @llvm.genx.fence(i8 2) - __esimd_fence(fence_mask::l3_flush_texture_data); - // CHECK: call void @llvm.genx.fence(i8 4) - __esimd_fence(fence_mask::l3_flush_constant_data); - // CHECK: call void @llvm.genx.fence(i8 8) - __esimd_fence(fence_mask::l3_flush_rw_data); - // CHECK: call void @llvm.genx.fence(i8 16) - __esimd_fence(fence_mask::local_barrier); - // CHECK: call void @llvm.genx.fence(i8 32) - __esimd_fence(fence_mask::l1_flush_ro_data); - // CHECK: call void @llvm.genx.fence(i8 64) - __esimd_fence(fence_mask::sw_barrier); - // CHECK: call void @llvm.genx.fence(i8 -128) - - return d; -} - // TODO // 1. __esimd* intrinsic translation tests from // llvm\test\SYCLLowerIR\esimd_lower_intrins.ll should be refactored and @@ -220,21 +81,34 @@ test_mem_intrins(int *addr, const vec &xf, using VecT = typename simd::raw_vector_type; VecT *vec_addr = reinterpret_cast(addr); vec x = __esimd_svm_block_ld(vec_addr); - // CHECK-LABEL: load <8 x i32>, <8 x i32> addrspace(4)* %{{[a-zA-Z0-9.]+}}, align 4 + // CHECK-LABEL: load <8 x i32>, ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 4 use(x); } { using VecT = typename simd::raw_vector_type; VecT *vec_addr = reinterpret_cast(addr); vec x = __esimd_svm_block_ld(vec_addr); - // CHECK-LABEL: load <8 x i32>, <8 x i32> addrspace(4)* %{{[a-zA-Z0-9.]+}}, align 32 + // CHECK-LABEL: load <8 x i32>, ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 32 use(x); } { using VecT = typename simd::raw_vector_type; VecT *vec_addr = reinterpret_cast(addr); __esimd_svm_block_st(vec_addr, get8i()); - // CHECK-LABEL: store <8 x i32> %{{[a-zA-Z0-9.]+}}, <8 x i32> addrspace(4)* %{{[a-zA-Z0-9.]+}}, align 32 + // CHECK-LABEL: store <8 x i32> %{{[a-zA-Z0-9.]+}}, ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 32 + } + { + uint32_t offset = 128; + vec x = __esimd_slm_block_ld(offset); + // CHECK: %[[VAR_OFF1:[0-9a-zA-Z_.]+]] = inttoptr i32 %{{[a-zA-Z0-9.]+}} to ptr addrspace(3) + // CHECK-NEXT: load <8 x i32>, ptr addrspace(3) %[[VAR_OFF1]], align 32 + use(x); + } + { + uint32_t offset = 256; + __esimd_slm_block_st(offset, get8i()); + // CHECK: %[[VAR_OFF2:[0-9a-zA-Z_.]+]] = inttoptr i32 %{{[a-zA-Z0-9.]+}} to ptr addrspace(3) + // CHECK-NEXT: store <8 x i32> %{{[a-zA-Z0-9.]+}}, ptr addrspace(3) %[[VAR_OFF2]], align 4 } { auto x = __esimd_svm_gather(get8ui64(), get8ui16()); @@ -318,3 +192,150 @@ SYCL_EXTERNAL void test_math_intrins() SYCL_ESIMD_FUNCTION { use(res); } } + +SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo() { + // CHECK-LABEL: @_Z3foov + constexpr int VL = 32; + uint32_t *ptr = 0; + using VecT = typename simd::raw_vector_type; + VecT *vec_ptr = 0; + + int x = 0, y = 0, z = 0; + + simd v1(0, x + z); + simd offsets(0, y); + simd v_addr(reinterpret_cast(ptr)); + simd_mask pred; + v_addr += offsets; + + __esimd_svm_atomic0(v_addr.data(), pred.data()); + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) + + __esimd_svm_atomic1(v_addr.data(), v1.data(), + pred.data()); + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) + __esimd_svm_atomic2( + v_addr.data(), v1.data(), v1.data(), pred.data()); + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) + + simd v00 = __esimd_svm_block_ld(vec_ptr); + // CHECK: %[[VAR1:[0-9a-zA-Z_.]+]] = load <32 x i32>, ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 4 + __esimd_svm_block_st(vec_ptr, v00.data()); + // CHECK-NEXT: store <32 x i32> %[[VAR1]], ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 128 + + simd v01 = + __esimd_svm_gather(v_addr.data(), pred.data()); + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.gather.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) + + __esimd_svm_scatter(v_addr.data(), v01.data(), pred.data()); + // CHECK: call void @llvm.genx.svm.scatter.v32i1.v32i64.v32i32(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}) + + simd mina(0, 1); + simd minc(5); + minc = __esimd_smin(mina.data(), minc.data()); + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i16> @llvm.genx.smin.v16i16.v16i16(<16 x i16> %{{[0-9a-zA-Z_.]+}}, <16 x i16> %{{[0-9a-zA-Z_.]+}}) + + simd diva(2.f); + simd divb(1.f); + diva = __esimd_ieee_div(diva.data(), divb.data()); + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <1 x float> @llvm.genx.ieee.div.v1f32(<1 x float> %{{[0-9a-zA-Z_.]+}}, <1 x float> %{{[0-9a-zA-Z_.]+}}) + + simd a(0.1f); + simd b = __esimd_rdregion(a.data(), 0); + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0) + + simd c(0.0f); + + using PH = sycl::access::placeholder; + + sycl::accessor, 2, + sycl::access::mode::read, sycl::access::target::image, + PH::false_t> + pA; + sycl::accessor, 2, + sycl::access::mode::write, sycl::access::target::image, + PH::false_t> + pB; + + auto d = __esimd_wrregion( + c.data() /*dst*/, b.data() /*src*/, 0 /*offset*/); + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x float> @llvm.genx.wrregionf.v16f32.v8f32.i16.v8i1(<16 x float> %{{[0-9a-zA-Z_.]+}}, <8 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0, <8 x i1> ) + + simd va; + va = media_block_load(pA, x, y); + // CHECK: %[[SI0_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %{{[0-9a-zA-Z_.]+}}) + // CHECK: store i32 %[[SI0_VAL]], ptr addrspace(4) %[[SI0_ADDR:[0-9a-zA-Z_.]+]] + // CHECK: %[[SI0:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI0_ADDR]] + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.media.ld.v32i32(i32 0, i32 %[[SI0]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}) + + simd vb = va + 1; + media_block_store(pB, x, y, vb); + // CHECK: %[[SI2_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %{{[0-9a-zA-Z_.]+}}) + // CHECK: store i32 %[[SI2_VAL]], ptr addrspace(4) %[[SI2_ADDR:[0-9a-zA-Z_.]+]] + // CHECK: %[[SI2:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI2_ADDR]] + // CHECK: call void @llvm.genx.media.st.v32i32(i32 0, i32 %[[SI2]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}) + + auto ee = __esimd_vload((detail::vector_type_t *)(&vg)); + // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i32> @llvm.genx.vload.v16i32.p0(ptr {{.*}}) + __esimd_vstore(&vc, va.data()); + // CHECK: store <32 x i32> %{{[0-9a-zA-Z_.]+}}, ptr addrspace(4) {{.*}} + + { + sycl::accessor + acc; + simd offsets = 1; + simd_mask<8> pred({1, 0, 1, 0, 1, 0, 1, 0}); + + // 4-byte element gather + simd v = gather(acc, offsets, 100); + // CHECK-STATEFUL: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) + // CHECK-STATEFUL: store i32 %[[SI3_VAL]], ptr addrspace(4) %[[SI3_ADDR:[0-9a-zA-Z_.]+]] + // CHECK-STATEFUL: %[[SI3:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI3_ADDR]] + // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %[[SI3]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}}) + // CHECK-STATELESS: call <8 x i32> @llvm.genx.svm.gather.v8i32.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> undef) + + // 4-byte element scatter + scatter(acc, offsets, v, 100, pred); + // CHECK-STATEFUL: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) + // CHECK-STATEFUL: store i32 %[[SI4_VAL]], ptr addrspace(4) %[[SI4_ADDR:[0-9a-zA-Z_.]+]] + // CHECK-STATEFUL: %[[SI4:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI4_ADDR]] + // CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) + // CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) + + // 1-byte element gather + simd v1 = gather(acc, offsets, 100); + // CHECK-STATEFUL: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) + // CHECK-STATEFUL: store i32 %[[SI5_VAL]], ptr addrspace(4) %[[SI5_ADDR:[0-9a-zA-Z_.]+]] + // CHECK-STATEFUL: %[[SI5:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI5_ADDR]] + // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}}) + // CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> undef) + + // 1-byte element scatter + scatter(acc, offsets, v1, 100, pred); + // CHECK-STATEFUL: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) + // CHECK-STATEFUL: store i32 %[[SI6_VAL]], ptr addrspace(4) %[[SI6_ADDR:[0-9a-zA-Z_.]+]] + // CHECK-STATEFUL: %[[SI6:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI6_ADDR]] + // CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, i16 0, i32 %[[SI6]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) + // CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v32i8(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> %{{[0-9a-zA-Z_.]+}}) + } + __esimd_fence(fence_mask::global_coherent_fence); + // CHECK: call void @llvm.genx.fence(i8 1) + __esimd_fence(fence_mask::l3_flush_instructions); + // CHECK: call void @llvm.genx.fence(i8 2) + __esimd_fence(fence_mask::l3_flush_texture_data); + // CHECK: call void @llvm.genx.fence(i8 4) + __esimd_fence(fence_mask::l3_flush_constant_data); + // CHECK: call void @llvm.genx.fence(i8 8) + __esimd_fence(fence_mask::l3_flush_rw_data); + // CHECK: call void @llvm.genx.fence(i8 16) + __esimd_fence(fence_mask::local_barrier); + // CHECK: call void @llvm.genx.fence(i8 32) + __esimd_fence(fence_mask::l1_flush_ro_data); + // CHECK: call void @llvm.genx.fence(i8 64) + __esimd_fence(fence_mask::sw_barrier); + // CHECK: call void @llvm.genx.fence(i8 -128) + + return d; +} diff --git a/sycl/test/esimd/intrins_trans_opaque.cpp b/sycl/test/esimd/intrins_trans_opaque.cpp deleted file mode 100644 index 76abec066417c..0000000000000 --- a/sycl/test/esimd/intrins_trans_opaque.cpp +++ /dev/null @@ -1,341 +0,0 @@ -// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm -Xclang -opaque-pointers %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table -// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL - -// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm -Xclang -opaque-pointers %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table -// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS - -// Checks ESIMD intrinsic translation with opaque pointers. -// NOTE: must be run in -O0, as optimizer optimizes away some of the code - -#include -#include -#include - -using namespace sycl::ext::intel::esimd; - -ESIMD_PRIVATE -detail::vector_type_t vc; -ESIMD_PRIVATE ESIMD_REGISTER(192) simd vg; - -SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo(); - -class EsimdFunctor { -public: - void operator()() __attribute__((sycl_explicit_simd)) { foo(); } -}; - -template -__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { - kernelFunc(); -} - -void bar() { - EsimdFunctor esimdf; - kernel(esimdf); -} - -// TODO -// 1. __esimd* intrinsic translation tests from -// llvm\test\SYCLLowerIR\esimd_lower_intrins.ll should be refactored and -// moved here, as the form below is much easier to maintain with the same -// level of testing strength -// 2. Test cases above should be refactored not to use user-level APIs like -// gather and use __esimd* calls instead. -template using vec = typename simd::raw_vector_type; - -template using mask = typename simd_mask::raw_vector_type; - -SYCL_EXTERNAL void use(const vec &x) SYCL_ESIMD_FUNCTION; -SYCL_EXTERNAL void use(const vec &x) SYCL_ESIMD_FUNCTION; -SYCL_EXTERNAL void use(const vec &x) SYCL_ESIMD_FUNCTION; - -SYCL_EXTERNAL vec get8f() SYCL_ESIMD_FUNCTION; -SYCL_EXTERNAL vec get8i() SYCL_ESIMD_FUNCTION; -SYCL_EXTERNAL vec get8ui64() SYCL_ESIMD_FUNCTION; -SYCL_EXTERNAL vec get8ui16() SYCL_ESIMD_FUNCTION; -SYCL_EXTERNAL vec get8ui8() SYCL_ESIMD_FUNCTION; - -SYCL_EXTERNAL void -test_mem_intrins(int *addr, const vec &xf, - const vec &xi) SYCL_ESIMD_FUNCTION { - { - constexpr SurfaceIndex si = 0; - vec x = __esimd_oword_ld_unaligned(si, 0); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.oword.ld.unaligned.v8f32(i32 0, i32 0, i32 0) - use(x); - } - { - constexpr SurfaceIndex si = 0; - vec x = __esimd_oword_ld(si, 0); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.oword.ld.v8f32(i32 0, i32 0, i32 0) - use(x); - } - { - constexpr SurfaceIndex si = 0; - __esimd_oword_st(si, 0, get8f()); - // CHECK-LABEL: call void @llvm.genx.oword.st.v8f32(i32 0, i32 0, <8 x float> %{{[a-zA-Z0-9.]+}}) - } - { - using VecT = typename simd::raw_vector_type; - VecT *vec_addr = reinterpret_cast(addr); - vec x = __esimd_svm_block_ld(vec_addr); - // CHECK-LABEL: load <8 x i32>, ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 4 - use(x); - } - { - using VecT = typename simd::raw_vector_type; - VecT *vec_addr = reinterpret_cast(addr); - vec x = __esimd_svm_block_ld(vec_addr); - // CHECK-LABEL: load <8 x i32>, ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 32 - use(x); - } - { - using VecT = typename simd::raw_vector_type; - VecT *vec_addr = reinterpret_cast(addr); - __esimd_svm_block_st(vec_addr, get8i()); - // CHECK-LABEL: store <8 x i32> %{{[a-zA-Z0-9.]+}}, ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 32 - } - { - uint32_t offset = 128; - vec x = __esimd_slm_block_ld(offset); - // CHECK: %[[VAR_OFF1:[0-9a-zA-Z_.]+]] = inttoptr i32 %{{[a-zA-Z0-9.]+}} to ptr addrspace(3) - // CHECK-NEXT: load <8 x i32>, ptr addrspace(3) %[[VAR_OFF1]], align 32 - use(x); - } - { - uint32_t offset = 256; - __esimd_slm_block_st(offset, get8i()); - // CHECK: %[[VAR_OFF2:[0-9a-zA-Z_.]+]] = inttoptr i32 %{{[a-zA-Z0-9.]+}} to ptr addrspace(3) - // CHECK-NEXT: store <8 x i32> %{{[a-zA-Z0-9.]+}}, ptr addrspace(3) %[[VAR_OFF2]], align 4 - } - { - auto x = __esimd_svm_gather(get8ui64(), get8ui16()); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x i8> @llvm.genx.svm.gather.v8i8.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, i32 0, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x i8> undef) - use(x); - } - { - __esimd_svm_scatter(get8ui64(), get8ui8(), get8ui16()); - // CHECK-LABEL: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i8(<8 x i1> %{{[a-zA-Z0-9.]+}}, i32 0, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x i8> %{{[a-zA-Z0-9.]+}}) - } - { - auto x = - __esimd_svm_atomic0(get8ui64(), get8ui16()); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x i32> @llvm.genx.svm.atomic.inc.v8i32.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x i32> undef) - use(x); - } - { - vec src0 = get8f(); - auto x = __esimd_svm_atomic1(get8ui64(), src0, - get8ui16()); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.svm.atomic.fmin.v8f32.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x float> %{{[a-zA-Z0-9.]+}}, <8 x float> undef) - use(x); - } - { - vec src0 = get8f(); - vec src1 = get8f(); - auto x = __esimd_svm_atomic2(get8ui64(), src0, - src1, get8ui16()); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.svm.atomic.fcmpwr.v8f32.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x float> %{{[a-zA-Z0-9.]+}}, <8 x float> %{{[a-zA-Z0-9.]+}}, <8 x float> undef) - use(x); - } - { - vec src0 = get8i(); - auto x = __esimd_svm_atomic1(get8ui64(), src0, - get8ui16()); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x i32> @llvm.genx.svm.atomic.imin.v8i32.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x i32> %{{[a-zA-Z0-9.]+}}, <8 x i32> undef) - use(x); - } - { - vec src0 = get8i(); - auto x = __esimd_svm_atomic1(get8ui64(), src0, - get8ui16()); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x i32> @llvm.genx.svm.atomic.imax.v8i32.v8i1.v8i64(<8 x i1> %{{[a-zA-Z0-9.]+}}, <8 x i64> %{{[a-zA-Z0-9.]+}}, <8 x i32> %{{[a-zA-Z0-9.]+}}, <8 x i32> undef) - use(x); - } - { - constexpr SurfaceIndex si = 0; - vec x = - __esimd_media_ld(si, 0, 0); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.media.ld.v8f32(i32 0, i32 0, i32 0, i32 4, i32 0, i32 0) - use(x); - } - { - constexpr SurfaceIndex si = 0; - vec x = get8f(); - __esimd_media_st(si, 0, 0, x); - // CHECK-LABEL: call void @llvm.genx.media.st.v8f32(i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, <8 x float> %{{[a-zA-Z0-9.]+}}) - } -} - -SYCL_EXTERNAL void test_math_intrins() SYCL_ESIMD_FUNCTION { - { - vec x0 = get8f(); - vec x1 = get8f(); - auto y = __esimd_ieee_div(x0, x1); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.ieee.div.v8f32(<8 x float> %{{[a-zA-Z0-9.]+}}, <8 x float> %{{[a-zA-Z0-9.]+}}) - use(y); - } - { - vec x = get8f(); - auto y = __esimd_ieee_sqrt(x); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x float> @llvm.genx.ieee.sqrt.v8f32(<8 x float> %{{[a-zA-Z0-9.]+}}) - use(y); - } - { - vec x0 = get8i(); - vec x1 = get8i(); - vec x2 = get8i(); - auto res = __esimd_bfn<0xff, int, 8>(x0, x1, x2); - // CHECK-LABEL: %{{[a-zA-Z0-9.]+}} = call <8 x i32> @llvm.genx.bfn.v8i32.v8i32(<8 x i32> %{{[a-zA-Z0-9.]+}}, <8 x i32> %{{[a-zA-Z0-9.]+}}, <8 x i32> %{{[a-zA-Z0-9.]+}}, i8 -1) - use(res); - } -} - -SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo() { - // CHECK-LABEL: @_Z3foov - constexpr int VL = 32; - uint32_t *ptr = 0; - using VecT = typename simd::raw_vector_type; - VecT *vec_ptr = 0; - - int x = 0, y = 0, z = 0; - - simd v1(0, x + z); - simd offsets(0, y); - simd v_addr(reinterpret_cast(ptr)); - simd_mask pred; - v_addr += offsets; - - __esimd_svm_atomic0(v_addr.data(), pred.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) - - __esimd_svm_atomic1(v_addr.data(), v1.data(), - pred.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) - __esimd_svm_atomic2( - v_addr.data(), v1.data(), v1.data(), pred.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) - - simd v00 = __esimd_svm_block_ld(vec_ptr); - // CHECK: %[[VAR1:[0-9a-zA-Z_.]+]] = load <32 x i32>, ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 4 - __esimd_svm_block_st(vec_ptr, v00.data()); - // CHECK-NEXT: store <32 x i32> %[[VAR1]], ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 128 - - simd v01 = - __esimd_svm_gather(v_addr.data(), pred.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.gather.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) - - __esimd_svm_scatter(v_addr.data(), v01.data(), pred.data()); - // CHECK: call void @llvm.genx.svm.scatter.v32i1.v32i64.v32i32(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}) - - simd mina(0, 1); - simd minc(5); - minc = __esimd_smin(mina.data(), minc.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i16> @llvm.genx.smin.v16i16.v16i16(<16 x i16> %{{[0-9a-zA-Z_.]+}}, <16 x i16> %{{[0-9a-zA-Z_.]+}}) - - simd diva(2.f); - simd divb(1.f); - diva = __esimd_ieee_div(diva.data(), divb.data()); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <1 x float> @llvm.genx.ieee.div.v1f32(<1 x float> %{{[0-9a-zA-Z_.]+}}, <1 x float> %{{[0-9a-zA-Z_.]+}}) - - simd a(0.1f); - simd b = __esimd_rdregion(a.data(), 0); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0) - - simd c(0.0f); - - using PH = sycl::access::placeholder; - - sycl::accessor, 2, - sycl::access::mode::read, sycl::access::target::image, - PH::false_t> - pA; - sycl::accessor, 2, - sycl::access::mode::write, sycl::access::target::image, - PH::false_t> - pB; - - auto d = __esimd_wrregion( - c.data() /*dst*/, b.data() /*src*/, 0 /*offset*/); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x float> @llvm.genx.wrregionf.v16f32.v8f32.i16.v8i1(<16 x float> %{{[0-9a-zA-Z_.]+}}, <8 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0, <8 x i1> ) - - simd va; - va = media_block_load(pA, x, y); - // CHECK: %[[SI0_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %{{[0-9a-zA-Z_.]+}}) - // CHECK: store i32 %[[SI0_VAL]], ptr addrspace(4) %[[SI0_ADDR:[0-9a-zA-Z_.]+]] - // CHECK: %[[SI0:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI0_ADDR]] - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.media.ld.v32i32(i32 0, i32 %[[SI0]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}) - - simd vb = va + 1; - media_block_store(pB, x, y, vb); - // CHECK: %[[SI2_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %{{[0-9a-zA-Z_.]+}}) - // CHECK: store i32 %[[SI2_VAL]], ptr addrspace(4) %[[SI2_ADDR:[0-9a-zA-Z_.]+]] - // CHECK: %[[SI2:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI2_ADDR]] - // CHECK: call void @llvm.genx.media.st.v32i32(i32 0, i32 %[[SI2]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}) - - auto ee = __esimd_vload((detail::vector_type_t *)(&vg)); - // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i32> @llvm.genx.vload.v16i32.p0(ptr {{.*}}) - __esimd_vstore(&vc, va.data()); - // CHECK: store <32 x i32> %{{[0-9a-zA-Z_.]+}}, ptr addrspace(4) {{.*}} - - { - sycl::accessor - acc; - simd offsets = 1; - simd_mask<8> pred({1, 0, 1, 0, 1, 0, 1, 0}); - - // 4-byte element gather - simd v = gather(acc, offsets, 100); - // CHECK-STATEFUL: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) - // CHECK-STATEFUL: store i32 %[[SI3_VAL]], ptr addrspace(4) %[[SI3_ADDR:[0-9a-zA-Z_.]+]] - // CHECK-STATEFUL: %[[SI3:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI3_ADDR]] - // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %[[SI3]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}}) - // CHECK-STATELESS: call <8 x i32> @llvm.genx.svm.gather.v8i32.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> undef) - - // 4-byte element scatter - scatter(acc, offsets, v, 100, pred); - // CHECK-STATEFUL: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) - // CHECK-STATEFUL: store i32 %[[SI4_VAL]], ptr addrspace(4) %[[SI4_ADDR:[0-9a-zA-Z_.]+]] - // CHECK-STATEFUL: %[[SI4:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI4_ADDR]] - // CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) - // CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) - - // 1-byte element gather - simd v1 = gather(acc, offsets, 100); - // CHECK-STATEFUL: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) - // CHECK-STATEFUL: store i32 %[[SI5_VAL]], ptr addrspace(4) %[[SI5_ADDR:[0-9a-zA-Z_.]+]] - // CHECK-STATEFUL: %[[SI5:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI5_ADDR]] - // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}}) - // CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> undef) - - // 1-byte element scatter - scatter(acc, offsets, v1, 100, pred); - // CHECK-STATEFUL: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}}) - // CHECK-STATEFUL: store i32 %[[SI6_VAL]], ptr addrspace(4) %[[SI6_ADDR:[0-9a-zA-Z_.]+]] - // CHECK-STATEFUL: %[[SI6:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI6_ADDR]] - // CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, i16 0, i32 %[[SI6]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) - // CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v32i8(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> %{{[0-9a-zA-Z_.]+}}) - } - __esimd_fence(fence_mask::global_coherent_fence); - // CHECK: call void @llvm.genx.fence(i8 1) - __esimd_fence(fence_mask::l3_flush_instructions); - // CHECK: call void @llvm.genx.fence(i8 2) - __esimd_fence(fence_mask::l3_flush_texture_data); - // CHECK: call void @llvm.genx.fence(i8 4) - __esimd_fence(fence_mask::l3_flush_constant_data); - // CHECK: call void @llvm.genx.fence(i8 8) - __esimd_fence(fence_mask::l3_flush_rw_data); - // CHECK: call void @llvm.genx.fence(i8 16) - __esimd_fence(fence_mask::local_barrier); - // CHECK: call void @llvm.genx.fence(i8 32) - __esimd_fence(fence_mask::l1_flush_ro_data); - // CHECK: call void @llvm.genx.fence(i8 64) - __esimd_fence(fence_mask::sw_barrier); - // CHECK: call void @llvm.genx.fence(i8 -128) - - return d; -} diff --git a/sycl/test/esimd/slm_init_specconst_size.cpp b/sycl/test/esimd/slm_init_specconst_size.cpp index 81a18fd15ba87..39ecacb2fa663 100644 --- a/sycl/test/esimd/slm_init_specconst_size.cpp +++ b/sycl/test/esimd/slm_init_specconst_size.cpp @@ -1,7 +1,4 @@ -// RUN: %clangxx -O2 -fsycl -fsycl-device-only -Xclang -no-opaque-pointers -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t -o %t.table -// RUN: FileCheck %s -input-file=%t_esimd_0.ll -// RUN: %clangxx -O2 -fsycl -fsycl-device-only -Xclang -opaque-pointers -emit-llvm %s -o %t +// RUN: %clangxx -O2 -fsycl -fsycl-device-only -emit-llvm %s -o %t // RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll // Checks that we set 0 as VCSLMSize when slm_init is used with diff --git a/sycl/test/esimd/sycl_half_basic_ops.cpp b/sycl/test/esimd/sycl_half_basic_ops.cpp index ea8db1ac596c2..ae357936a6f54 100644 --- a/sycl/test/esimd/sycl_half_basic_ops.cpp +++ b/sycl/test/esimd/sycl_half_basic_ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -Xclang -no-opaque-pointers -fsycl -fsycl-device-only -S %s -o %t +// RUN: %clangxx -fsycl -fsycl-device-only -S %s -o %t // RUN: sycl-post-link -split-esimd -lower-esimd -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll @@ -15,71 +15,49 @@ using namespace sycl; // clang-format off // --- Unary operation SYCL_EXTERNAL auto test_unary_op(simd val) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func void @_Z13test_unary_op{{.*}}( -// CHECK: {{.*}} %[[RET_VEC_ADDR:[a-zA-Z0-9_\.]+]], -// CHECK: {{.*}} %[[VAL_PTR:[a-zA-Z0-9_\.]+]]){{.*}} { +// CHECK: define dso_local spir_func <8 x half> @_Z13test_unary_op{{.*}}( +// CHECK: {{.*}} %[[VAL_VEC:[a-zA-Z0-9_\.]+]]){{.*}} { return -val; -// CHECK: %[[VAL_VEC_ADDR:[a-zA-Z0-9_\.]+]] = addrspacecast {{.*}} %[[VAL_PTR]] -// CHECK-NEXT: %[[VAL_VEC:[a-zA-Z0-9_\.]+]] = load <8 x half>{{.*}} %[[VAL_VEC_ADDR]] -// CHECK-NEXT: %[[RES:[a-zA-Z0-9_\.]+]] = fneg <8 x half> %[[VAL_VEC]] -// CHECK-NEXT: store <8 x half>{{.*}}%[[RES]], {{.*}}%[[RET_VEC_ADDR]] -// CHECK-NEXT: ret void +// CHECK: %[[RES:[a-zA -Z0-9_\.]+]] = fneg <8 x half> %[[VAL_VEC]] +// CHECK-NEXT: ret <8 x half> %[[RES]] // CHECK-LABEL: } } // --- Binary operation on pair SYCL_EXTERNAL auto test_binary_op1(simd val1, simd val2) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func void @_Z15test_binary_op1{{.*}}( -// CHECK: {{[^,]*}} %[[RET_VEC_ADDR:[a-zA-Z0-9_\.]+]], -// CHECK: {{[^,]*}} %[[VAL1_PTR:[a-zA-Z0-9_\.]+]], -// CHECK: {{.*}} %[[VAL2_PTR:[a-zA-Z0-9_\.]+]]){{.*}} { +// CHECK: define dso_local spir_func <8 x half> @_Z15test_binary_op1{{.*}}( +// CHECK: {{[^,]*}} %[[VAL1_VEC:[a-zA-Z0-9_\.]+]], +// CHECK: %[[VAL2_VEC:[a-zA-Z0-9_\.]+]]{{.*}} { return val1 + val2; -// CHECK: %[[VAL1_VEC_ADDR:[a-zA-Z0-9_\.]+]] = addrspacecast {{.*}} %[[VAL1_PTR]] -// CHECK-NEXT: %[[VAL1_VEC:[a-zA-Z0-9_\.]+]] = load <8 x half>{{.*}} %[[VAL1_VEC_ADDR]] -// CHECK-NEXT: %[[VAL2_VEC_ADDR:[a-zA-Z0-9_\.]+]] = addrspacecast {{.*}} %[[VAL2_PTR]] -// CHECK-NEXT: %[[VAL2_VEC:[a-zA-Z0-9_\.]+]] = load <8 x half>{{.*}} %[[VAL2_VEC_ADDR]] -// CHECK-NEXT: %[[RES:[a-zA-Z0-9_\.]+]] = fadd <8 x half> %[[VAL1_VEC]], %[[VAL2_VEC]] -// CHECK-NEXT: store <8 x half>{{.*}}%[[RES]], {{.*}}%[[RET_VEC_ADDR]] -// CHECK-NEXT: ret void +// CHECK: %[[RES:[a-zA -Z0-9_\.]+]] = fadd <8 x half> %[[VAL1_VEC]], %[[VAL2_VEC]] +// CHECK-NEXT: ret <8 x half> %[[RES]] // CHECK-LABEL: } } // --- Binary operation on pair // The integer operand is expected to be converted to half type. SYCL_EXTERNAL auto test_binary_op2(simd val1, simd val2) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func void @_Z15test_binary_op2{{[^\(]*}}( -// CHECK: <8 x half>{{[^,]*}}* %[[RET_VEC_ADDR:[a-zA-Z0-9_\.]+]], -// CHECK: <8 x half>* %[[VAL1_PTR:[a-zA-Z0-9_\.]+]], -// CHECK: <8 x i64>* %[[VAL2_PTR:[a-zA-Z0-9_\.]+]]){{.*}} { +// CHECK: define dso_local spir_func <8 x half> @_Z15test_binary_op2{{[^\(]*}}( +// CHECK: <8 x half> %[[VAL1_VEC:[a-zA-Z0-9_\.]+]], +// CHECK: <8 x i64> %[[VAL2_VEC:[a-zA-Z0-9_\.]+]]{{.*}} { return val1 + val2; -// CHECK: %[[VAL1_VEC_ADDR:[a-zA-Z0-9_\.]+]] = addrspacecast {{.*}} %[[VAL1_PTR]] -// CHECK-NEXT: %[[VAL1_VEC:[a-zA-Z0-9_\.]+]] = load <8 x half>{{.*}} %[[VAL1_VEC_ADDR]] -// CHECK-NEXT: %[[VAL2_VEC_ADDR:[a-zA-Z0-9_\.]+]] = addrspacecast {{.*}} %[[VAL2_PTR]] -// CHECK-NEXT: %[[VAL2_VEC:[a-zA-Z0-9_\.]+]] = load <8 x i64>{{.*}} %[[VAL2_VEC_ADDR]] -// CHECK-NEXT: %[[CONV:[a-zA-Z0-9_\.]+]] = sitofp <8 x i64> %[[VAL2_VEC]] to <8 x half> -// CHECK-NEXT: %[[RES:[a-zA-Z0-9_\.]+]] = fadd <8 x half> %[[VAL1_VEC]], %[[CONV]] -// CHECK-NEXT: store <8 x half>{{.*}}%[[RES]], {{.*}}%[[RET_VEC_ADDR]] -// CHECK-NEXT: ret void +// CHECK: %[[CONV:[a-zA-Z0-9_\.]+]] = sitofp <8 x i64> %[[VAL2_VEC]] to <8 x half> +// CHECK-NEXT: %[[RES:[a-zA-Z0-9_\.]+]] = fadd <8 x half> %[[CONV]], %[[VAL1_VEC]] +// CHECK-NEXT: ret <8 x half> %[[RES]] // CHECK-LABEL: } } // --- Comparison operation on pair // The integer operand is expected to be converted to half type. SYCL_EXTERNAL auto test_cmp_op(simd val1, simd val2) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func void @_Z11test_cmp_op{{[^\(]*}}( -// CHECK: <8 x i16>{{[^,]*}}* %[[RET_VEC_ADDR:[a-zA-Z0-9_\.]+]], -// CHECK: <8 x half>* %[[VAL1_PTR:[a-zA-Z0-9_\.]+]], -// CHECK: <8 x i64>* %[[VAL2_PTR:[a-zA-Z0-9_\.]+]]){{.*}} { +// CHECK: define dso_local spir_func <8 x i16> @_Z11test_cmp_op{{[^\(]*}}( +// CHECK: <8 x half> %[[VAL1_VEC:[a-zA-Z0-9_\.]+]], +// CHECK: <8 x i64> %[[VAL2_VEC:[a-zA-Z0-9_\.]+]]{{.*}} { return val1 < val2; -// CHECK: %[[VAL1_VEC_ADDR:[a-zA-Z0-9_\.]+]] = addrspacecast {{.*}} %[[VAL1_PTR]] -// CHECK-NEXT: %[[VAL1_VEC:[a-zA-Z0-9_\.]+]] = load <8 x half>{{.*}} %[[VAL1_VEC_ADDR]] -// CHECK-NEXT: %[[VAL2_VEC_ADDR:[a-zA-Z0-9_\.]+]] = addrspacecast {{.*}} %[[VAL2_PTR]] -// CHECK-NEXT: %[[VAL2_VEC:[a-zA-Z0-9_\.]+]] = load <8 x i64>{{.*}} %[[VAL2_VEC_ADDR]] -// CHECK-NEXT: %[[CONV:[a-zA-Z0-9_\.]+]] = sitofp <8 x i64> %[[VAL2_VEC]] to <8 x half> -// CHECK-NEXT: %[[CMP:[a-zA-Z0-9_\.]+]] = fcmp olt <8 x half> %[[VAL1_VEC]], %[[CONV]] +// CHECK: %[[CONV:[a-zA-Z0-9_\.]+]] = sitofp <8 x i64> %[[VAL2_VEC]] to <8 x half> +// CHECK-NEXT: %[[CMP:[a-zA-Z0-9_\.]+]] = fcmp ogt <8 x half> %[[CONV]], %[[VAL1_VEC]] // CHECK-NEXT: %[[RES:[a-zA-Z0-9_\.]+]] = zext <8 x i1> %[[CMP]] to <8 x i16> -// CHECK-NEXT: store <8 x i16>{{.*}}%[[RES]], {{.*}}%[[RET_VEC_ADDR]] -// CHECK-NEXT: ret void +// CHECK-NEXT: ret <8 x i16>{{.*}}%[[RES]] // CHECK-LABEL: } } // clang-format on diff --git a/sycl/test/esimd/sycl_half_basic_ops_opaque.cpp b/sycl/test/esimd/sycl_half_basic_ops_opaque.cpp deleted file mode 100644 index 444763f5aaf98..0000000000000 --- a/sycl/test/esimd/sycl_half_basic_ops_opaque.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// RUN: %clangxx -Xclang -opaque-pointers -fsycl -fsycl-device-only -S %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -S %t -o %t.table -// RUN: FileCheck %s -input-file=%t_esimd_0.ll - -// The test checks that there are no unexpected extra conversions or intrinsic -// calls added by the API headers or compiler when generating code -// for basic C++ operations on simd values. - -#include - -using namespace sycl::ext::intel::esimd; -using namespace sycl::ext::intel; -using namespace sycl; - -// clang-format off -// --- Unary operation -SYCL_EXTERNAL auto test_unary_op(simd val) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func <8 x half> @_Z13test_unary_op{{.*}}( -// CHECK: {{.*}} %[[VAL_VEC:[a-zA-Z0-9_\.]+]]){{.*}} { - return -val; -// CHECK: %[[RES:[a-zA -Z0-9_\.]+]] = fneg <8 x half> %[[VAL_VEC]] -// CHECK-NEXT: ret <8 x half> %[[RES]] -// CHECK-LABEL: } -} - -// --- Binary operation on pair -SYCL_EXTERNAL auto test_binary_op1(simd val1, simd val2) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func <8 x half> @_Z15test_binary_op1{{.*}}( -// CHECK: {{[^,]*}} %[[VAL1_VEC:[a-zA-Z0-9_\.]+]], -// CHECK: %[[VAL2_VEC:[a-zA-Z0-9_\.]+]]{{.*}} { - return val1 + val2; -// CHECK: %[[RES:[a-zA -Z0-9_\.]+]] = fadd <8 x half> %[[VAL1_VEC]], %[[VAL2_VEC]] -// CHECK-NEXT: ret <8 x half> %[[RES]] -// CHECK-LABEL: } -} - -// --- Binary operation on pair -// The integer operand is expected to be converted to half type. -SYCL_EXTERNAL auto test_binary_op2(simd val1, simd val2) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func <8 x half> @_Z15test_binary_op2{{[^\(]*}}( -// CHECK: <8 x half> %[[VAL1_VEC:[a-zA-Z0-9_\.]+]], -// CHECK: <8 x i64> %[[VAL2_VEC:[a-zA-Z0-9_\.]+]]{{.*}} { - return val1 + val2; -// CHECK: %[[CONV:[a-zA-Z0-9_\.]+]] = sitofp <8 x i64> %[[VAL2_VEC]] to <8 x half> -// CHECK-NEXT: %[[RES:[a-zA-Z0-9_\.]+]] = fadd <8 x half> %[[CONV]], %[[VAL1_VEC]] -// CHECK-NEXT: ret <8 x half> %[[RES]] -// CHECK-LABEL: } -} - -// --- Comparison operation on pair -// The integer operand is expected to be converted to half type. -SYCL_EXTERNAL auto test_cmp_op(simd val1, simd val2) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func <8 x i16> @_Z11test_cmp_op{{[^\(]*}}( -// CHECK: <8 x half> %[[VAL1_VEC:[a-zA-Z0-9_\.]+]], -// CHECK: <8 x i64> %[[VAL2_VEC:[a-zA-Z0-9_\.]+]]{{.*}} { - return val1 < val2; -// CHECK: %[[CONV:[a-zA-Z0-9_\.]+]] = sitofp <8 x i64> %[[VAL2_VEC]] to <8 x half> -// CHECK-NEXT: %[[CMP:[a-zA-Z0-9_\.]+]] = fcmp ogt <8 x half> %[[CONV]], %[[VAL1_VEC]] -// CHECK-NEXT: %[[RES:[a-zA-Z0-9_\.]+]] = zext <8 x i1> %[[CMP]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16>{{.*}}%[[RES]] -// CHECK-LABEL: } -} -// clang-format on diff --git a/sycl/test/esimd/sycl_half_math_ops.cpp b/sycl/test/esimd/sycl_half_math_ops.cpp index e303c4c97edc9..6bfa67b85f741 100644 --- a/sycl/test/esimd/sycl_half_math_ops.cpp +++ b/sycl/test/esimd/sycl_half_math_ops.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -Xclang -no-opaque-pointers -fsycl -fsycl-device-only -S %s -o %t.ll +// RUN: %clangxx -fsycl -fsycl-device-only -S %s -o %t.ll // RUN: sycl-post-link -split-esimd -lower-esimd -S %t.ll -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll @@ -14,15 +14,11 @@ using namespace sycl; // clang-format off SYCL_EXTERNAL auto test_ext_math_op(simd val) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func void @_Z16test_ext_math_op{{[^\(]*}}( -// CHECK: <8 x half>{{[^,]*}}* %[[RET_VEC_ADDR:[a-zA-Z0-9_\.]+]], -// CHECK: <8 x half>* %[[VAL_PTR:[a-zA-Z0-9_\.]+]]){{.*}} { +// CHECK: define dso_local spir_func <8 x half> @_Z16test_ext_math_op{{[^\(]*}}( +// CHECK: <8 x half> %[[VAL_VEC:[a-zA-Z0-9_\.]+]]){{.*}} { return esimd::cos(val); -// CHECK: %[[VAL_VEC_ADDR:[a-zA-Z0-9_\.]+]] = addrspacecast {{.*}} %[[VAL_PTR]] -// CHECK-NEXT: %[[VAL_VEC:[a-zA-Z0-9_\.]+]] = load <8 x half>{{.*}} %[[VAL_VEC_ADDR]] -// CHECK-NEXT: %[[RES:[a-zA-Z0-9_\.]+]] = call <8 x half> @llvm.genx.cos.v8f16(<8 x half> %[[VAL_VEC]]) -// CHECK-NEXT: store <8 x half>{{.*}}%[[RES]], {{.*}}%[[RET_VEC_ADDR]] -// CHECK-NEXT: ret void +// CHECK: %[[RES:[a-zA-Z0-9_\.]+]] = call <8 x half> @llvm.genx.cos.v8f16(<8 x half> %[[VAL_VEC]]) +// CHECK-NEXT: ret <8 x half> %[[RES]] // CHECK-LABEL: } } // clang-format on diff --git a/sycl/test/esimd/sycl_half_math_ops_opaque.cpp b/sycl/test/esimd/sycl_half_math_ops_opaque.cpp deleted file mode 100644 index df1d20bf55694..0000000000000 --- a/sycl/test/esimd/sycl_half_math_ops_opaque.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// RUN: %clangxx -Xclang -opaque-pointers -fsycl -fsycl-device-only -S %s -o %t.ll -// RUN: sycl-post-link -split-esimd -lower-esimd -S %t.ll -o %t.table -// RUN: FileCheck %s -input-file=%t_esimd_0.ll - -// The test checks that there are no unexpected extra conversions or intrinsic -// calls added by the API headers or compiler when generating code -// for math operations on simd values. - -#include - -using namespace sycl::ext::intel::esimd; -using namespace sycl::ext::intel; -using namespace sycl; - -// clang-format off -SYCL_EXTERNAL auto test_ext_math_op(simd val) SYCL_ESIMD_FUNCTION { -// CHECK: define dso_local spir_func <8 x half> @_Z16test_ext_math_op{{[^\(]*}}( -// CHECK: <8 x half> %[[VAL_VEC:[a-zA-Z0-9_\.]+]]){{.*}} { - return esimd::cos(val); -// CHECK: %[[RES:[a-zA-Z0-9_\.]+]] = call <8 x half> @llvm.genx.cos.v8f16(<8 x half> %[[VAL_VEC]]) -// CHECK-NEXT: ret <8 x half> %[[RES]] -// CHECK-LABEL: } -} -// clang-format on diff --git a/sycl/test/esimd/vec_arg_call_conv_ext.cpp b/sycl/test/esimd/vec_arg_call_conv_ext.cpp index a4cdc137b7192..c662b64bb2720 100644 --- a/sycl/test/esimd/vec_arg_call_conv_ext.cpp +++ b/sycl/test/esimd/vec_arg_call_conv_ext.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -Xclang -opaque-pointers -fsycl-device-only -Xclang -emit-llvm -o %t.comp.ll %s +// RUN: %clangxx -fsycl -fsycl-device-only -Xclang -emit-llvm -o %t.comp.ll %s // RUN: sycl-post-link -ir-output-only -lower-esimd -S %t.comp.ll -o %t.out.ll // RUN: FileCheck --input-file=%t.out.ll %s diff --git a/sycl/test/esimd/vec_arg_call_conv_smoke.cpp b/sycl/test/esimd/vec_arg_call_conv_smoke.cpp index 8f1f184842a4b..1e863232dd4d3 100644 --- a/sycl/test/esimd/vec_arg_call_conv_smoke.cpp +++ b/sycl/test/esimd/vec_arg_call_conv_smoke.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -Xclang -opaque-pointers -fsycl-device-only -Xclang -emit-llvm -o %t.comp.ll %s +// RUN: %clangxx -fsycl -fsycl-device-only -Xclang -emit-llvm -o %t.comp.ll %s // RUN: sycl-post-link -ir-output-only -lower-esimd -S %t.comp.ll -o %t.out.ll // RUN: FileCheck --input-file=%t.out.ll %s