From db1d2f6a15e60de498870b807d233187306a58fb Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Fri, 1 Oct 2021 06:51:53 -0700 Subject: [PATCH 1/6] [ESIMD] Re-work loads from globals in sycl-post-link 1) The re-work in the lowering of loads from globals was required because the previous implementation did not allow handling the loads from scalar globals. 2) The previous implementation generated duplicated vector loads for each of users. Fixed it here. 3) Added lowering for __spirv_BuiltInSubgroupLocalInvocationId(), which must always return 0 for ESIMD. Signed-off-by: Vyacheslav N Klochkov --- llvm/lib/SYCLLowerIR/LowerESIMD.cpp | 284 ++++++++++++------------ sycl/test/esimd/spirv_intrins_trans.cpp | 29 ++- 2 files changed, 165 insertions(+), 148 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp index 708b5424a2d9d..147dd9657b249 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp @@ -845,145 +845,170 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI) { auto CastOpcode = CastInst::getCastOpcode(NewI, false, OITy, false); NewI = CastInst::Create(CastOpcode, NewI, OITy, NewI->getName() + ".cast.ty", OldI); + NewI->setDebugLoc(OldI->getDebugLoc()); } return NewI; } -static int getIndexForSuffix(StringRef Suff) { - return llvm::StringSwitch(Suff) - .Case("x", 0) - .Case("y", 1) - .Case("z", 2) - .Default(-1); -} - -// Helper function to convert extractelement instruction associated with the -// load from SPIRV builtin global, into the GenX intrinsic that returns vector -// of coordinates. It also generates required extractelement and cast -// instructions. Example: -// %0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast -// (<3 x i64> addrspace(1)* @__spirv_BuiltInLocalInvocationId -// to <3 x i64> addrspace(4)*), align 32 -// %1 = extractelement <3 x i64> %0, i64 0 -// -// => -// -// %.esimd = call <3 x i32> @llvm.genx.local.id.v3i32() -// %local_id.x = extractelement <3 x i32> %.esimd, i32 0 -// %local_id.x.cast.ty = zext i32 %local_id.x to i64 -static Instruction *generateVectorGenXForSpirv(ExtractElementInst *EEI, - StringRef Suff, - const std::string &IntrinName, - StringRef ValueName) { +/// Generates the call of GenX intrinsic \p IntrinName and inserts it +/// right before the given instruction \p LI. If the parameter \p IsVectorCall +/// is set to true, then the GenX intrinsic returns a vector of 3 32-bit +/// integers. +static Instruction *generateGenXCall(Instruction *LI, StringRef IntrinName, + bool IsVectorCall) { std::string IntrName = - std::string(GenXIntrinsic::getGenXIntrinsicPrefix()) + IntrinName; - auto ID = GenXIntrinsic::lookupGenXIntrinsicID(IntrName); - LLVMContext &Ctx = EEI->getModule()->getContext(); - Type *I32Ty = Type::getInt32Ty(Ctx); - Function *NewFDecl = GenXIntrinsic::getGenXDeclaration( - EEI->getModule(), ID, {FixedVectorType::get(I32Ty, 3)}); - Instruction *IntrI = - IntrinsicInst::Create(NewFDecl, {}, EEI->getName() + ".esimd", EEI); - int ExtractIndex = getIndexForSuffix(Suff); - assert(ExtractIndex != -1 && "Extract index is invalid."); - Twine ExtractName = ValueName + Suff; - - Instruction *ExtrI = ExtractElementInst::Create( - IntrI, ConstantInt::get(I32Ty, ExtractIndex), ExtractName, EEI); - Instruction *CastI = addCastInstIfNeeded(EEI, ExtrI); - if (EEI->getDebugLoc()) { - IntrI->setDebugLoc(EEI->getDebugLoc()); - ExtrI->setDebugLoc(EEI->getDebugLoc()); - // It's OK if ExtrI and CastI is the same instruction - CastI->setDebugLoc(EEI->getDebugLoc()); - } - return CastI; -} - -// Helper function to convert extractelement instruction associated with the -// load from SPIRV builtin global, into the GenX intrinsic. It also generates -// required cast instructions. Example: -// %0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> -// addrspace(1)* @__spirv_BuiltInWorkgroupId to <3 x i64> addrspace(4)*), align -// 32 %1 = extractelement <3 x i64> %0, i64 0 -// => -// %0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> -// addrspace(1)* @__spirv_BuiltInWorkgroupId to <3 x i64> addrspace(4)*), align -// 32 %group.id.x = call i32 @llvm.genx.group.id.x() %group.id.x.cast.ty = zext -// i32 %group.id.x to i64 -static Instruction *generateGenXForSpirv(ExtractElementInst *EEI, - StringRef Suff, - const std::string &IntrinName) { - std::string IntrName = std::string(GenXIntrinsic::getGenXIntrinsicPrefix()) + - IntrinName + Suff.str(); + (Twine(GenXIntrinsic::getGenXIntrinsicPrefix()) + Twine(IntrinName)) + .str(); auto ID = GenXIntrinsic::lookupGenXIntrinsicID(IntrName); + Type *I32Ty = Type::getInt32Ty(LI->getModule()->getContext()); Function *NewFDecl = - GenXIntrinsic::getGenXDeclaration(EEI->getModule(), ID, {}); - - Instruction *IntrI = - IntrinsicInst::Create(NewFDecl, {}, IntrinName + Suff.str(), EEI); - Instruction *CastI = addCastInstIfNeeded(EEI, IntrI); - if (EEI->getDebugLoc()) { - IntrI->setDebugLoc(EEI->getDebugLoc()); - // It's OK if IntrI and CastI is the same instruction - CastI->setDebugLoc(EEI->getDebugLoc()); - } - return CastI; + IsVectorCall ? GenXIntrinsic::getGenXDeclaration( + LI->getModule(), ID, FixedVectorType::get(I32Ty, 3)) + : GenXIntrinsic::getGenXDeclaration(LI->getModule(), ID); + + std::string ResultName = + (Twine(LI->getNameOrAsOperand()) + "." + IntrName).str(); + auto *Inst = IntrinsicInst::Create(NewFDecl, {}, ResultName, LI); + Inst->setDebugLoc(LI->getDebugLoc()); + return Inst; } -// This function translates one occurence of SPIRV builtin use into GenX -// intrinsic. -static Value *translateSpirvGlobalUse(ExtractElementInst *EEI, - StringRef SpirvGlobalName) { +/// Returns the index from the given extract element instruction \p EEI. +/// It is checked here that the index is either 0, 1, or 2. +static uint64_t getIndexFromExtract(ExtractElementInst *EEI) { Value *IndexV = EEI->getIndexOperand(); assert(isa(IndexV) && - "Extract element index should be a constant"); + "Expected a const index in extract element instruction"); + uint64_t IndexValue = cast(IndexV)->getZExtValue(); + assert(IndexValue <= 2 && + "Extract element index should be either 0, 1, or 2"); + return IndexValue; +} - // Get the suffix based on the index of extractelement instruction - ConstantInt *IndexC = cast(IndexV); - std::string Suff; - if (IndexC->equalsInt(0)) - Suff = 'x'; - else if (IndexC->equalsInt(1)) - Suff = 'y'; - else if (IndexC->equalsInt(2)) - Suff = 'z'; - else - assert(false && "Extract element index should be either 0, 1, or 2"); - - // Translate SPIRV into GenX intrinsic. +/// Extracts the index from the given extract element instruction \p EEI, +/// and attaches the corresponding suffix (either "x", "y", or "z") to the given +/// \p Name. +static StringRef addGenXSuffix(StringRef Name, ExtractElementInst *EEI) { + uint64_t IndexValue = getIndexFromExtract(EEI); + return Twine(Name + Twine(static_cast('x' + IndexValue))).str(); +} + +/// Generates extractelement instruction associated with GenX intrinsic +/// returning a vector. It also generates required cast instructions. +/// The new instructions are insrted before the given extract element +/// instruction \p EEI. The index for the new extract element is copied +/// from the old extract element instruction. The instruction \p OpndI +/// specifies the variable from which the element must be extracted. +static Instruction *genExtractAndCast(Instruction *OpndI, + ExtractElementInst *EEI) { + uint64_t IndexValue = getIndexFromExtract(EEI); + + Type *I32Ty = Type::getInt32Ty(EEI->getModule()->getContext()); + std::string ExtractName = + (Twine(EEI->getNameOrAsOperand()) + ".ext." + Twine(IndexValue)).str(); + Instruction *ExtrI = ExtractElementInst::Create( + OpndI, ConstantInt::get(I32Ty, IndexValue), ExtractName, EEI); + ExtrI->setDebugLoc(EEI->getDebugLoc()); + + return addCastInstIfNeeded(EEI, ExtrI); +} + +/// Replaces the load \p LI of SPIRV global with corresponding call(s) of GenX +/// intrinsic(s). The users of LI may also be transformed if needed for def/use +/// type correctness. +/// The replaced instructions are stored into the given container +/// \p InstsToErase. +static void +translateSpirvGlobalUse(LoadInst *LI, StringRef SpirvGlobalName, + SmallVectorImpl &InstsToErase) { + // Translate the original load from SPIRV global first. + // Generate either a) some calls of vector GenX intrinsics to replace + // the original vector load + extracts, or b) one scalar call replacing + // the original scalar call here and translate/convert the users of the + // SPIRV global later. + SmallVector VCall; + Value *SCall = nullptr; if (SpirvGlobalName == "WorkgroupSize") { - return generateVectorGenXForSpirv(EEI, Suff, "local.size.v3i32", "wgsize."); + VCall.push_back(generateGenXCall(LI, "local.size.v3i32", true)); } else if (SpirvGlobalName == "LocalInvocationId") { - return generateVectorGenXForSpirv(EEI, Suff, "local.id.v3i32", "local_id."); - } else if (SpirvGlobalName == "WorkgroupId") { - return generateGenXForSpirv(EEI, Suff, "group.id."); + VCall.push_back(generateGenXCall(LI, "local.id.v3i32", true)); + } else if (SpirvGlobalName == "NumWorkgroups") { + VCall.push_back(generateGenXCall(LI, "group.count.v3i32", true)); } else if (SpirvGlobalName == "GlobalInvocationId") { - // GlobalId = LocalId + WorkGroupSize * GroupId - Instruction *LocalIdI = - generateVectorGenXForSpirv(EEI, Suff, "local.id.v3i32", "local_id."); - Instruction *WGSizeI = - generateVectorGenXForSpirv(EEI, Suff, "local.size.v3i32", "wgsize."); - Instruction *GroupIdI = generateGenXForSpirv(EEI, Suff, "group.id."); - Instruction *MulI = - BinaryOperator::CreateMul(WGSizeI, GroupIdI, "mul", EEI); - return BinaryOperator::CreateAdd(LocalIdI, MulI, "add", EEI); + // Special case: GlobalId = LocalId + WorkGroupSize * GroupId + // Call LocalId and WorkgroupSize here to replace the original load + // and do the rest of computations when lower the uses. + VCall.push_back(generateGenXCall(LI, "local.id.v3i32", true)); + VCall.push_back(generateGenXCall(LI, "local.size.v3i32", true)); } else if (SpirvGlobalName == "GlobalSize") { - // GlobalSize = WorkGroupSize * NumWorkGroups - Instruction *WGSizeI = - generateVectorGenXForSpirv(EEI, Suff, "local.size.v3i32", "wgsize."); - Instruction *NumWGI = generateVectorGenXForSpirv( - EEI, Suff, "group.count.v3i32", "group_count."); - return BinaryOperator::CreateMul(WGSizeI, NumWGI, "mul", EEI); + // Special case: GlobalSize = WorkGroupSize * NumWorkGroups + VCall.push_back(generateGenXCall(LI, "local.size.v3i32", true)); + VCall.push_back(generateGenXCall(LI, "group.count.v3i32", true)); + } else if (SpirvGlobalName == "WorkgroupId") { + // GenX does not provide a vector intrinsic for this vector global. + // So, proceed to lowering of users of the load and generate specialized + // version of GenX intrinsic, e.g. group.id.x or group.id.y + } else if (SpirvGlobalName == "SubgroupLocalInvocationId") { + // Subgroup local id always returns 0 for ESIMD. + SCall = llvm::Constant::getNullValue(LI->getType()); } else if (SpirvGlobalName == "GlobalOffset") { // TODO: Support GlobalOffset SPIRV intrinsics - return llvm::Constant::getNullValue(EEI->getType()); - } else if (SpirvGlobalName == "NumWorkgroups") { - return generateVectorGenXForSpirv(EEI, Suff, "group.count.v3i32", - "group_count."); + // Currently, GlobalOffset always returns 0. + // Just proceed to lowering of users of load from GlobalOffset global. } - return nullptr; + // TODO: Also, implement support for the following intrinsics: + // uint32_t __spirv_BuiltIn SubgroupSize; + // uint32_t __spirv_BuiltIn SubgroupMaxSize; + // uint32_t __spirv_BuiltIn NumSubgroups; + // uint32_t __spirv_BuiltIn SubgroupId; + + // Replace the original scalar load with newly generated instruction. + assert(!LI->users().empty() && "Found a global load that is unused."); + if (SCall) { + LI->replaceAllUsesWith(SCall); + InstsToErase.push_back(LI); + return; + } + + // Replace the users of vector load. Each user is expected to be an element + // extract instruction. + for (User *LU : LI->users()) { + ExtractElementInst *EEI = dyn_cast(LU); + assert(EEI && "User of global var load must be an instruction."); + User *Inst = nullptr; + if (SpirvGlobalName == "WorkgroupSize" || + SpirvGlobalName == "LocalInvocationId" || + SpirvGlobalName == "NumWorkgroups") { + Inst = genExtractAndCast(VCall[0], EEI); + } else if (SpirvGlobalName == "GlobalInvocationId") { + // GlobalId = LocalId + WorkGroupSize * GroupId + Instruction *LocalIdI = genExtractAndCast(VCall[0], EEI); + Instruction *WGSizeI = genExtractAndCast(VCall[1], EEI); + Instruction *GroupIdI = addCastInstIfNeeded( + EEI, generateGenXCall(EEI, addGenXSuffix("group.id.", EEI), false)); + Instruction *MulI = + BinaryOperator::CreateMul(WGSizeI, GroupIdI, "mul", EEI); + Inst = BinaryOperator::CreateAdd(LocalIdI, MulI, "add", EEI); + } else if (SpirvGlobalName == "GlobalSize") { + // GlobalSize = WorkGroupSize * NumWorkGroups + Instruction *WGSizeI = genExtractAndCast(VCall[0], EEI); + Instruction *NumWGI = genExtractAndCast(VCall[1], EEI); + Inst = BinaryOperator::CreateMul(WGSizeI, NumWGI, "mul", EEI); + } else if (SpirvGlobalName == "GlobalOffset") { + // TODO: Support GlobalOffset SPIRV intrinsics + // Currently all users of load of GlobalOffset are replaced with 0. + Inst = llvm::Constant::getNullValue(EEI->getType()); + } else if (SpirvGlobalName == "WorkgroupId") { + Inst = addCastInstIfNeeded( + EEI, generateGenXCall(EEI, addGenXSuffix("group.id.", EEI), false)); + } + + assert(Inst && "Load from global SPIRV builtin was not translated"); + EEI->replaceAllUsesWith(Inst); + InstsToErase.push_back(EEI); + } + InstsToErase.push_back(LI); } static void createESIMDIntrinsicArgs(const ESIMDIntrinDesc &Desc, @@ -1369,8 +1394,7 @@ SmallPtrSet collectGenXVolatileTypes(Module &M) { } // namespace -PreservedAnalyses SYCLLowerESIMDPass::run(Module &M, - ModuleAnalysisManager &) { +PreservedAnalyses SYCLLowerESIMDPass::run(Module &M, ModuleAnalysisManager &) { generateKernelMetadata(M); SmallPtrSet GVTS = collectGenXVolatileTypes(M); @@ -1507,22 +1531,10 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F, auto PrefLen = StringRef(SPIRV_INTRIN_PREF).size(); // Go through all the uses of the load instruction from SPIRV builtin - // globals, which are required to be extractelement instructions. + // globals.which are required to be extractelement instructions. // Translate each of them. - for (auto *LU : LI->users()) { - auto *EEI = dyn_cast(LU); - assert(EEI && "User of load from global SPIRV builtin is not an " - "extractelement instruction"); - Value *TranslatedVal = translateSpirvGlobalUse( - EEI, SpirvGlobal->getName().drop_front(PrefLen)); - assert(TranslatedVal && - "Load from global SPIRV builtin was not translated"); - EEI->replaceAllUsesWith(TranslatedVal); - ESIMDToErases.push_back(EEI); - } - // After all users of load were translated, we get rid of the load - // itself. - ESIMDToErases.push_back(LI); + translateSpirvGlobalUse(LI, SpirvGlobal->getName().drop_front(PrefLen), + ESIMDToErases); } } // Now demangle and translate found ESIMD intrinsic calls diff --git a/sycl/test/esimd/spirv_intrins_trans.cpp b/sycl/test/esimd/spirv_intrins_trans.cpp index 5384e5dd31cfc..4bc363d3238ba 100644 --- a/sycl/test/esimd/spirv_intrins_trans.cpp +++ b/sycl/test/esimd/spirv_intrins_trans.cpp @@ -30,8 +30,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalInvocationId_x // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() - // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 0 // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() + // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 0 // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 0 // CHECK: {{.*}} call i32 @llvm.genx.group.id.x() @@ -40,8 +40,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalInvocationId_y // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() - // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 1 // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() + // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 1 // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 1 // CHECK: {{.*}} call i32 @llvm.genx.group.id.y() @@ -50,8 +50,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalInvocationId_z // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() - // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 2 // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() + // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 2 // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 2 // CHECK: {{.*}} call i32 @llvm.genx.group.id.z() @@ -60,8 +60,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalSize_x // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() - // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 0 // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() + // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 0 // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 0 kernel([=]() SYCL_ESIMD_KERNEL { @@ -69,8 +69,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalSize_y // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() - // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 1 // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() + // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 1 // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 1 kernel([=]() SYCL_ESIMD_KERNEL { @@ -78,8 +78,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalSize_z // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() - // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 2 // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() + // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 2 // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 2 kernel([=]() SYCL_ESIMD_KERNEL { @@ -187,8 +187,8 @@ size_t caller() { // addrspace(1)* @__spirv_BuiltInWorkgroupId to <3 x i64> addrspace(4)*), // align 32 %1 = extractelement <3 x i64> %0, i64 0 %2 = extractelement <3 // x i64> %0, i64 1 %3 = extractelement <3 x i64> %0, i64 2 - // In this case we will generate 3 calls to the same GenX intrinsic, - // But -early-cse will later remove this redundancy. + // In this case we will generate only 1 call to GenX intrinsic, and re-use + // it 3 times in extract element instructions. auto DoNotOptimizeXYZ = bufXYZ.get_access(cgh); kernel([=]() SYCL_ESIMD_KERNEL { @@ -199,10 +199,8 @@ size_t caller() { // CHECK-LABEL: @{{.*}}kernel_LocalInvocationId_xyz // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 0 - // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() - // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 1 - // CHECK: [[CALL_ESIMD3:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() - // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD3]], i32 2 + // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 1 + // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 2 kernel([=]() SYCL_ESIMD_KERNEL { DoNotOptimizeXYZ[0] = __spirv_WorkgroupId_x(); @@ -213,6 +211,13 @@ size_t caller() { // CHECK: {{.*}} call i32 @llvm.genx.group.id.x() // CHECK: {{.*}} call i32 @llvm.genx.group.id.y() // CHECK: {{.*}} call i32 @llvm.genx.group.id.z() + + kernel([=]() SYCL_ESIMD_KERNEL { + *DoNotOptimize.get_pointer() = __spirv_SubgroupLocalInvocationId(); + }); + // CHECK-LABEL: @{{.*}}kernel_SubgroupLocalInvocationId + // CHECK: [[ZEXT0:%.*]] = zext i32 0 to i64 + // CHECK: store i64 [[ZEXT0]] }); return DoNotOpt; } From b04643af764e12fbdaefae7897e6aa405ffdc4ce Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Wed, 6 Oct 2021 14:56:05 -0700 Subject: [PATCH 2/6] Minor fix to fix an error showing up only Release build mode Signed-off-by: Vyacheslav N Klochkov --- llvm/lib/SYCLLowerIR/LowerESIMD.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp index 147dd9657b249..4bf91f7609f48 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp @@ -888,7 +888,7 @@ static uint64_t getIndexFromExtract(ExtractElementInst *EEI) { /// Extracts the index from the given extract element instruction \p EEI, /// and attaches the corresponding suffix (either "x", "y", or "z") to the given /// \p Name. -static StringRef addGenXSuffix(StringRef Name, ExtractElementInst *EEI) { +static std::string addGenXSuffix(StringRef Name, ExtractElementInst *EEI) { uint64_t IndexValue = getIndexFromExtract(EEI); return Twine(Name + Twine(static_cast('x' + IndexValue))).str(); } From 7d7ea9635c93ff3204b23cf482c53aebb6f22506 Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Thu, 7 Oct 2021 00:26:46 -0700 Subject: [PATCH 3/6] Partially revert the initial commit in this PR: keep the duplicates of GenX calls This commit also implements loads from SubgroupSize and SubgroupMaxSize SPIRV globals. Signed-off-by: Vyacheslav N Klochkov --- llvm/lib/SYCLLowerIR/LowerESIMD.cpp | 198 ++++++++++-------------- sycl/test/esimd/spirv_intrins_trans.cpp | 41 ++++- 2 files changed, 111 insertions(+), 128 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp index 4bf91f7609f48..5c8c83ada2b13 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp @@ -850,29 +850,6 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI) { return NewI; } -/// Generates the call of GenX intrinsic \p IntrinName and inserts it -/// right before the given instruction \p LI. If the parameter \p IsVectorCall -/// is set to true, then the GenX intrinsic returns a vector of 3 32-bit -/// integers. -static Instruction *generateGenXCall(Instruction *LI, StringRef IntrinName, - bool IsVectorCall) { - std::string IntrName = - (Twine(GenXIntrinsic::getGenXIntrinsicPrefix()) + Twine(IntrinName)) - .str(); - auto ID = GenXIntrinsic::lookupGenXIntrinsicID(IntrName); - Type *I32Ty = Type::getInt32Ty(LI->getModule()->getContext()); - Function *NewFDecl = - IsVectorCall ? GenXIntrinsic::getGenXDeclaration( - LI->getModule(), ID, FixedVectorType::get(I32Ty, 3)) - : GenXIntrinsic::getGenXDeclaration(LI->getModule(), ID); - - std::string ResultName = - (Twine(LI->getNameOrAsOperand()) + "." + IntrName).str(); - auto *Inst = IntrinsicInst::Create(NewFDecl, {}, ResultName, LI); - Inst->setDebugLoc(LI->getDebugLoc()); - return Inst; -} - /// Returns the index from the given extract element instruction \p EEI. /// It is checked here that the index is either 0, 1, or 2. static uint64_t getIndexFromExtract(ExtractElementInst *EEI) { @@ -885,127 +862,108 @@ static uint64_t getIndexFromExtract(ExtractElementInst *EEI) { return IndexValue; } -/// Extracts the index from the given extract element instruction \p EEI, -/// and attaches the corresponding suffix (either "x", "y", or "z") to the given -/// \p Name. -static std::string addGenXSuffix(StringRef Name, ExtractElementInst *EEI) { - uint64_t IndexValue = getIndexFromExtract(EEI); - return Twine(Name + Twine(static_cast('x' + IndexValue))).str(); -} - -/// Generates extractelement instruction associated with GenX intrinsic -/// returning a vector. It also generates required cast instructions. -/// The new instructions are insrted before the given extract element -/// instruction \p EEI. The index for the new extract element is copied -/// from the old extract element instruction. The instruction \p OpndI -/// specifies the variable from which the element must be extracted. -static Instruction *genExtractAndCast(Instruction *OpndI, - ExtractElementInst *EEI) { +/// Generates the call of GenX intrinsic \p IntrinName and inserts it +/// right before the given extract element instruction \p EEI using the result +/// of vector load. The parameter \p IsVectorCall tells what version of GenX +/// intrinsic (scalar or vector) to use to lower the load from SPIRV global. +static Instruction *generateGenXCall(ExtractElementInst *EEI, + StringRef IntrinName, bool IsVectorCall) { uint64_t IndexValue = getIndexFromExtract(EEI); - + std::string Suffix = + IsVectorCall + ? ".v3i32" + : (Twine(".") + Twine(static_cast('x' + IndexValue))).str(); + std::string FullIntrinName = (Twine(GenXIntrinsic::getGenXIntrinsicPrefix()) + + Twine(IntrinName) + Suffix) + .str(); + auto ID = GenXIntrinsic::lookupGenXIntrinsicID(FullIntrinName); Type *I32Ty = Type::getInt32Ty(EEI->getModule()->getContext()); - std::string ExtractName = - (Twine(EEI->getNameOrAsOperand()) + ".ext." + Twine(IndexValue)).str(); - Instruction *ExtrI = ExtractElementInst::Create( - OpndI, ConstantInt::get(I32Ty, IndexValue), ExtractName, EEI); - ExtrI->setDebugLoc(EEI->getDebugLoc()); + Function *NewFDecl = + IsVectorCall ? GenXIntrinsic::getGenXDeclaration( + EEI->getModule(), ID, FixedVectorType::get(I32Ty, 3)) + : GenXIntrinsic::getGenXDeclaration(EEI->getModule(), ID); - return addCastInstIfNeeded(EEI, ExtrI); + std::string ResultName = + (Twine(EEI->getNameOrAsOperand()) + "." + FullIntrinName).str(); + Instruction *Inst = IntrinsicInst::Create(NewFDecl, {}, ResultName, EEI); + Inst->setDebugLoc(EEI->getDebugLoc()); + + if (IsVectorCall) { + Type *I32Ty = Type::getInt32Ty(EEI->getModule()->getContext()); + std::string ExtractName = + (Twine(Inst->getNameOrAsOperand()) + ".ext." + Twine(IndexValue)).str(); + Inst = ExtractElementInst::Create(Inst, ConstantInt::get(I32Ty, IndexValue), + ExtractName, EEI); + Inst->setDebugLoc(EEI->getDebugLoc()); + } + Inst = addCastInstIfNeeded(EEI, Inst); + return Inst; } /// Replaces the load \p LI of SPIRV global with corresponding call(s) of GenX -/// intrinsic(s). The users of LI may also be transformed if needed for def/use -/// type correctness. +/// intrinsic(s). The users of \p LI may also be transformed if needed for +/// def/use type correctness. /// The replaced instructions are stored into the given container /// \p InstsToErase. static void -translateSpirvGlobalUse(LoadInst *LI, StringRef SpirvGlobalName, - SmallVectorImpl &InstsToErase) { - // Translate the original load from SPIRV global first. - // Generate either a) some calls of vector GenX intrinsics to replace - // the original vector load + extracts, or b) one scalar call replacing - // the original scalar call here and translate/convert the users of the - // SPIRV global later. - SmallVector VCall; - Value *SCall = nullptr; - if (SpirvGlobalName == "WorkgroupSize") { - VCall.push_back(generateGenXCall(LI, "local.size.v3i32", true)); - } else if (SpirvGlobalName == "LocalInvocationId") { - VCall.push_back(generateGenXCall(LI, "local.id.v3i32", true)); - } else if (SpirvGlobalName == "NumWorkgroups") { - VCall.push_back(generateGenXCall(LI, "group.count.v3i32", true)); - } else if (SpirvGlobalName == "GlobalInvocationId") { - // Special case: GlobalId = LocalId + WorkGroupSize * GroupId - // Call LocalId and WorkgroupSize here to replace the original load - // and do the rest of computations when lower the uses. - VCall.push_back(generateGenXCall(LI, "local.id.v3i32", true)); - VCall.push_back(generateGenXCall(LI, "local.size.v3i32", true)); - } else if (SpirvGlobalName == "GlobalSize") { - // Special case: GlobalSize = WorkGroupSize * NumWorkGroups - VCall.push_back(generateGenXCall(LI, "local.size.v3i32", true)); - VCall.push_back(generateGenXCall(LI, "group.count.v3i32", true)); - } else if (SpirvGlobalName == "WorkgroupId") { - // GenX does not provide a vector intrinsic for this vector global. - // So, proceed to lowering of users of the load and generate specialized - // version of GenX intrinsic, e.g. group.id.x or group.id.y - } else if (SpirvGlobalName == "SubgroupLocalInvocationId") { - // Subgroup local id always returns 0 for ESIMD. - SCall = llvm::Constant::getNullValue(LI->getType()); - } else if (SpirvGlobalName == "GlobalOffset") { - // TODO: Support GlobalOffset SPIRV intrinsics - // Currently, GlobalOffset always returns 0. - // Just proceed to lowering of users of load from GlobalOffset global. - } - - // TODO: Also, implement support for the following intrinsics: - // uint32_t __spirv_BuiltIn SubgroupSize; - // uint32_t __spirv_BuiltIn SubgroupMaxSize; +translateSpirvGlobalUses(LoadInst *LI, StringRef SpirvGlobalName, + SmallVectorImpl &InstsToErase) { + // TODO: Implement support for the following intrinsics: // uint32_t __spirv_BuiltIn NumSubgroups; // uint32_t __spirv_BuiltIn SubgroupId; - // Replace the original scalar load with newly generated instruction. - assert(!LI->users().empty() && "Found a global load that is unused."); - if (SCall) { - LI->replaceAllUsesWith(SCall); + // Translate the loads from _scalar_ SPIRV globals in the next block. + // Such globals require the replacement of the load only because the users + // may have any kind/opcode and we do not even try replacing the users here. + Value *NewInst = nullptr; + if (SpirvGlobalName == "SubgroupLocalInvocationId") { + NewInst = llvm::Constant::getNullValue(LI->getType()); + } else if (SpirvGlobalName == "SubgroupSize" || + SpirvGlobalName == "SubgroupMaxSize") { + NewInst = llvm::Constant::getIntegerValue(LI->getType(), + llvm::APInt(32, 1, true)); + } + if (NewInst) { + LI->replaceAllUsesWith(NewInst); InstsToErase.push_back(LI); return; } - // Replace the users of vector load. Each user is expected to be an element - // extract instruction. + // Only loads from _vector_ SPIRV globals reach here. Replace their users now. for (User *LU : LI->users()) { ExtractElementInst *EEI = dyn_cast(LU); - assert(EEI && "User of global var load must be an instruction."); - User *Inst = nullptr; - if (SpirvGlobalName == "WorkgroupSize" || - SpirvGlobalName == "LocalInvocationId" || - SpirvGlobalName == "NumWorkgroups") { - Inst = genExtractAndCast(VCall[0], EEI); + assert(EEI && "User of load from vector SPIRV global must be an extract"); + NewInst = nullptr; + + if (SpirvGlobalName == "WorkgroupSize") { + NewInst = generateGenXCall(EEI, "local.size", true); + } else if (SpirvGlobalName == "LocalInvocationId") { + NewInst = generateGenXCall(EEI, "local.id", true); + } else if (SpirvGlobalName == "WorkgroupId") { + NewInst = generateGenXCall(EEI, "group.id", false); } else if (SpirvGlobalName == "GlobalInvocationId") { // GlobalId = LocalId + WorkGroupSize * GroupId - Instruction *LocalIdI = genExtractAndCast(VCall[0], EEI); - Instruction *WGSizeI = genExtractAndCast(VCall[1], EEI); - Instruction *GroupIdI = addCastInstIfNeeded( - EEI, generateGenXCall(EEI, addGenXSuffix("group.id.", EEI), false)); + Instruction *LocalIdI = generateGenXCall(EEI, "local.id", true); + Instruction *WGSizeI = generateGenXCall(EEI, "local.size", true); + Instruction *GroupIdI = generateGenXCall(EEI, "group.id", false); Instruction *MulI = BinaryOperator::CreateMul(WGSizeI, GroupIdI, "mul", EEI); - Inst = BinaryOperator::CreateAdd(LocalIdI, MulI, "add", EEI); + NewInst = BinaryOperator::CreateAdd(LocalIdI, MulI, "add", EEI); } else if (SpirvGlobalName == "GlobalSize") { // GlobalSize = WorkGroupSize * NumWorkGroups - Instruction *WGSizeI = genExtractAndCast(VCall[0], EEI); - Instruction *NumWGI = genExtractAndCast(VCall[1], EEI); - Inst = BinaryOperator::CreateMul(WGSizeI, NumWGI, "mul", EEI); + Instruction *WGSizeI = generateGenXCall(EEI, "local.size", true); + Instruction *NumWGI = generateGenXCall(EEI, "group.count", true); + NewInst = BinaryOperator::CreateMul(WGSizeI, NumWGI, "mul", EEI); } else if (SpirvGlobalName == "GlobalOffset") { // TODO: Support GlobalOffset SPIRV intrinsics // Currently all users of load of GlobalOffset are replaced with 0. - Inst = llvm::Constant::getNullValue(EEI->getType()); - } else if (SpirvGlobalName == "WorkgroupId") { - Inst = addCastInstIfNeeded( - EEI, generateGenXCall(EEI, addGenXSuffix("group.id.", EEI), false)); + NewInst = llvm::Constant::getNullValue(EEI->getType()); + } else if (SpirvGlobalName == "NumWorkgroups") { + NewInst = generateGenXCall(EEI, "group.count", true); } - assert(Inst && "Load from global SPIRV builtin was not translated"); - EEI->replaceAllUsesWith(Inst); + assert(NewInst && "Load from global SPIRV builtin was not translated"); + EEI->replaceAllUsesWith(NewInst); InstsToErase.push_back(EEI); } InstsToErase.push_back(LI); @@ -1530,11 +1488,11 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F, auto PrefLen = StringRef(SPIRV_INTRIN_PREF).size(); - // Go through all the uses of the load instruction from SPIRV builtin - // globals.which are required to be extractelement instructions. - // Translate each of them. - translateSpirvGlobalUse(LI, SpirvGlobal->getName().drop_front(PrefLen), - ESIMDToErases); + // Translate all uses of the load instruction from SPIRV builtin global. + // Replaces the original global load and it is uses and stores the old + // instructions to ESIMDToErases. + translateSpirvGlobalUses(LI, SpirvGlobal->getName().drop_front(PrefLen), + ESIMDToErases); } } // Now demangle and translate found ESIMD intrinsic calls diff --git a/sycl/test/esimd/spirv_intrins_trans.cpp b/sycl/test/esimd/spirv_intrins_trans.cpp index 4bc363d3238ba..809238059eb52 100644 --- a/sycl/test/esimd/spirv_intrins_trans.cpp +++ b/sycl/test/esimd/spirv_intrins_trans.cpp @@ -18,20 +18,23 @@ size_t caller() { size_t DoNotOpt; cl::sycl::buffer buf(&DoNotOpt, 1); + uint32_t DoNotOpt32; + cl::sycl::buffer buf32(&DoNotOpt32, 1); size_t DoNotOptXYZ[3]; cl::sycl::buffer bufXYZ(&DoNotOptXYZ[0], sycl::range<1>(3)); cl::sycl::queue().submit([&](cl::sycl::handler &cgh) { auto DoNotOptimize = buf.get_access(cgh); + auto DoNotOptimize32 = buf32.get_access(cgh); kernel([=]() SYCL_ESIMD_KERNEL { *DoNotOptimize.get_pointer() = __spirv_GlobalInvocationId_x(); }); // CHECK-LABEL: @{{.*}}kernel_GlobalInvocationId_x // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() - // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 0 + // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 0 // CHECK: {{.*}} call i32 @llvm.genx.group.id.x() @@ -40,8 +43,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalInvocationId_y // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() - // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 1 + // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 1 // CHECK: {{.*}} call i32 @llvm.genx.group.id.y() @@ -50,8 +53,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalInvocationId_z // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() - // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 2 + // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 2 // CHECK: {{.*}} call i32 @llvm.genx.group.id.z() @@ -60,8 +63,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalSize_x // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() - // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 0 + // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 0 kernel([=]() SYCL_ESIMD_KERNEL { @@ -69,8 +72,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalSize_y // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() - // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 1 + // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 1 kernel([=]() SYCL_ESIMD_KERNEL { @@ -78,8 +81,8 @@ size_t caller() { }); // CHECK-LABEL: @{{.*}}kernel_GlobalSize_z // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() - // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 2 + // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 2 kernel([=]() SYCL_ESIMD_KERNEL { @@ -199,8 +202,10 @@ size_t caller() { // CHECK-LABEL: @{{.*}}kernel_LocalInvocationId_xyz // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 0 - // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 1 - // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 2 + // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() + // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 1 + // CHECK: [[CALL_ESIMD3:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() + // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD3]], i32 2 kernel([=]() SYCL_ESIMD_KERNEL { DoNotOptimizeXYZ[0] = __spirv_WorkgroupId_x(); @@ -214,10 +219,30 @@ size_t caller() { kernel([=]() SYCL_ESIMD_KERNEL { *DoNotOptimize.get_pointer() = __spirv_SubgroupLocalInvocationId(); + *DoNotOptimize32.get_pointer() = __spirv_SubgroupLocalInvocationId() + 3; }); // CHECK-LABEL: @{{.*}}kernel_SubgroupLocalInvocationId // CHECK: [[ZEXT0:%.*]] = zext i32 0 to i64 // CHECK: store i64 [[ZEXT0]] + // CHECK: add i32 0, 3 + + kernel([=]() SYCL_ESIMD_KERNEL { + *DoNotOptimize.get_pointer() = __spirv_SubgroupSize(); + *DoNotOptimize32.get_pointer() = __spirv_SubgroupSize() + 7; + }); + // CHECK-LABEL: @{{.*}}kernel_SubgroupSize + // CHECK: [[ZEXT0:%.*]] = zext i32 1 to i64 + // CHECK: store i64 [[ZEXT0]] + // CHECK: add i32 1, 7 + + kernel([=]() SYCL_ESIMD_KERNEL { + *DoNotOptimize.get_pointer() = __spirv_SubgroupMaxSize(); + *DoNotOptimize32.get_pointer() = __spirv_SubgroupMaxSize() + 9; + }); + // CHECK-LABEL: @{{.*}}kernel_SubgroupMaxSize + // CHECK: [[ZEXT0:%.*]] = zext i32 1 to i64 + // CHECK: store i64 [[ZEXT0]] + // CHECK: add i32 1, 9 }); return DoNotOpt; } From eae34f7d2a3cddf9662ecc65047fdc93f51152b7 Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Thu, 7 Oct 2021 11:23:55 -0700 Subject: [PATCH 4/6] [NFC] - revert the comment change in LIT test Signed-off-by: Vyacheslav N Klochkov --- sycl/test/esimd/spirv_intrins_trans.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/esimd/spirv_intrins_trans.cpp b/sycl/test/esimd/spirv_intrins_trans.cpp index 809238059eb52..7f7da671e01fb 100644 --- a/sycl/test/esimd/spirv_intrins_trans.cpp +++ b/sycl/test/esimd/spirv_intrins_trans.cpp @@ -190,8 +190,8 @@ size_t caller() { // addrspace(1)* @__spirv_BuiltInWorkgroupId to <3 x i64> addrspace(4)*), // align 32 %1 = extractelement <3 x i64> %0, i64 0 %2 = extractelement <3 // x i64> %0, i64 1 %3 = extractelement <3 x i64> %0, i64 2 - // In this case we will generate only 1 call to GenX intrinsic, and re-use - // it 3 times in extract element instructions. + // In this case we will generate 3 calls to the same GenX intrinsic, + // But -early-cse will later remove this redundancy. auto DoNotOptimizeXYZ = bufXYZ.get_access(cgh); kernel([=]() SYCL_ESIMD_KERNEL { From d5dbf7b37e0de18a45361c1fa8e9510f553b8d3d Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Fri, 8 Oct 2021 18:56:18 -0700 Subject: [PATCH 5/6] Minor fixes to address reviewer's comments Signed-off-by: Vyacheslav N Klochkov --- llvm/lib/SYCLLowerIR/LowerESIMD.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp index 8e5fe609aca05..c1df763575961 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp @@ -43,6 +43,8 @@ namespace id = itanium_demangle; #define SLM_BTI 254 +#define MAX_DIMS 3 + namespace { SmallPtrSet collectGenXVolatileTypes(Module &); void generateKernelMetadata(Module &); @@ -855,10 +857,8 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI) { /// It is checked here that the index is either 0, 1, or 2. static uint64_t getIndexFromExtract(ExtractElementInst *EEI) { Value *IndexV = EEI->getIndexOperand(); - assert(isa(IndexV) && - "Expected a const index in extract element instruction"); uint64_t IndexValue = cast(IndexV)->getZExtValue(); - assert(IndexValue <= 2 && + assert(IndexValue < MAX_DIMS && "Extract element index should be either 0, 1, or 2"); return IndexValue; } @@ -881,7 +881,7 @@ static Instruction *generateGenXCall(ExtractElementInst *EEI, Type *I32Ty = Type::getInt32Ty(EEI->getModule()->getContext()); Function *NewFDecl = IsVectorCall ? GenXIntrinsic::getGenXDeclaration( - EEI->getModule(), ID, FixedVectorType::get(I32Ty, 3)) + EEI->getModule(), ID, FixedVectorType::get(I32Ty, MAX_DIMS)) : GenXIntrinsic::getGenXDeclaration(EEI->getModule(), ID); std::string ResultName = @@ -913,9 +913,10 @@ translateSpirvGlobalUses(LoadInst *LI, StringRef SpirvGlobalName, // uint32_t __spirv_BuiltIn NumSubgroups; // uint32_t __spirv_BuiltIn SubgroupId; - // Translate the loads from _scalar_ SPIRV globals in the next block. - // Such globals require the replacement of the load only because the users - // may have any kind/opcode and we do not even try replacing the users here. + // Translate those loads from _scalar_ SPIRV globals that can be replaced with + // a const value here. + // The loads from other scalar SPIRV globals may require insertion of GenX calls + // before each user, which is done in the loop by users of 'LI' below. Value *NewInst = nullptr; if (SpirvGlobalName == "SubgroupLocalInvocationId") { NewInst = llvm::Constant::getNullValue(LI->getType()); @@ -930,10 +931,13 @@ translateSpirvGlobalUses(LoadInst *LI, StringRef SpirvGlobalName, return; } - // Only loads from _vector_ SPIRV globals reach here. Replace their users now. + // Only loads from _vector_ SPIRV globals reach here now. Their users are + // expected to be ExtractElementInst only, and they are replaced in this loop. + // When loads from _scalar_ SPIRV globals are handled here as well, the users + // will not be replaced by new instructions, but the GenX call replacing the + // original load 'LI' should be inserted before each user. for (User *LU : LI->users()) { - ExtractElementInst *EEI = dyn_cast(LU); - assert(EEI && "User of load from vector SPIRV global must be an extract"); + ExtractElementInst *EEI = cast(LU); NewInst = nullptr; if (SpirvGlobalName == "WorkgroupSize") { From 4e9dcaa77d49ad1cc688762effa223055f6e2bc6 Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Fri, 8 Oct 2021 18:59:21 -0700 Subject: [PATCH 6/6] clang-format fixes Signed-off-by: Vyacheslav N Klochkov --- llvm/lib/SYCLLowerIR/LowerESIMD.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp index c1df763575961..00679c6fd79fb 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp @@ -880,9 +880,10 @@ static Instruction *generateGenXCall(ExtractElementInst *EEI, auto ID = GenXIntrinsic::lookupGenXIntrinsicID(FullIntrinName); Type *I32Ty = Type::getInt32Ty(EEI->getModule()->getContext()); Function *NewFDecl = - IsVectorCall ? GenXIntrinsic::getGenXDeclaration( - EEI->getModule(), ID, FixedVectorType::get(I32Ty, MAX_DIMS)) - : GenXIntrinsic::getGenXDeclaration(EEI->getModule(), ID); + IsVectorCall + ? GenXIntrinsic::getGenXDeclaration( + EEI->getModule(), ID, FixedVectorType::get(I32Ty, MAX_DIMS)) + : GenXIntrinsic::getGenXDeclaration(EEI->getModule(), ID); std::string ResultName = (Twine(EEI->getNameOrAsOperand()) + "." + FullIntrinName).str(); @@ -915,8 +916,8 @@ translateSpirvGlobalUses(LoadInst *LI, StringRef SpirvGlobalName, // Translate those loads from _scalar_ SPIRV globals that can be replaced with // a const value here. - // The loads from other scalar SPIRV globals may require insertion of GenX calls - // before each user, which is done in the loop by users of 'LI' below. + // The loads from other scalar SPIRV globals may require insertion of GenX + // calls before each user, which is done in the loop by users of 'LI' below. Value *NewInst = nullptr; if (SpirvGlobalName == "SubgroupLocalInvocationId") { NewInst = llvm::Constant::getNullValue(LI->getType());